setwd("C:/Users/20621066W/Desktop/T2DM")
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(patchwork)
library(purrr)
library(uwot)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(ClusterR)
## Loading required package: gtools
library(splines)
library(flextable)
##
## Attaching package: 'flextable'
## The following object is masked from 'package:purrr':
##
## compose
library(archetypes)
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: nnls
library(survival)
library(survminer)
## Loading required package: ggpubr
##
## Attaching package: 'ggpubr'
## The following objects are masked from 'package:flextable':
##
## border, font, rotate
##
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
##
## myeloma
library(ggfortify)
library(ranger)
library(stats)
library(broom)
library("viridis")
## Loading required package: viridisLite
library(devtools)
## Loading required package: usethis
# Carreguem les dades
load("bbdd_covar_T2DM.RData")
bbdd_covar <- bbdd_covar_T2DM
bbdd_covar <- bbdd_covar %>%
mutate(obesity_BMI = as.numeric(obesity == 1 | 30 <= BMI),
obesity_BMI = if_else(is.na(obesity_BMI), 0, obesity_BMI),
sex_female = factor(sex_female, labels = c('Men', 'Women')),
obesity_BMI = factor(obesity_BMI, labels = c('No obesity', 'Obesity')),
TimeT2DM = TimeT2DM/365.25) %>%
mutate_at(.vars = c('A10', 'A10A', 'A10B', 'C01', 'C02', 'C03', 'C07', 'C08', 'C09', 'C10', 'M01A'),
function(x) if_else(is.na(x), 0, x))
desc_bbdd <- function(abbdd){
n_sex <- abbdd %>%
count(sex = sex_female)
abbdd %>%
group_by(sex_female) %>%
summarise(name = 'total',
N = as.character(n()),
resum = as.character(n())) %>%
bind_rows(abbdd %>%
select(rowId, sex_female, Current, Former, obesity_BMI, A10A, A10B, C02, C03, C07, C08,
C09, C10, M01A, angor, ami, stroke, tia) %>%
mutate_if(is.numeric, as.character) %>%
# mutate(Current = as.character(Current),
# Former = as.character(Former),
# obesity_BMI = as.character(obesity_BMI)) %>%
pivot_longer(cols = !contains(c('rowId', 'sex_female'))) %>%
group_by(sex_female, name, value) %>%
summarise(N = n(),
.groups = 'keep') %>%
group_by(sex_female, name) %>%
transmute(sex_female,
name,
value,
resum = sprintf('%i (%2.1f%%)', N, N/sum(N)*100)) %>%
filter(value != 0)) %>%
bind_rows(abbdd %>%
select(-Current, -obesity_BMI, -Former, -A10, -A10A, -A10B,
-C02, -C03, -C07, -C08, -C09, -C10, -M01A, -angor, -ami, -stroke, -tia) %>%
pivot_longer(cols = !contains(c('rowId', 'sex_female'))) %>%
group_by(sex_female, name) %>%
summarise(N = sum(!is.na(value)),
mn = mean(value, na.rm = T),
sd = sd(value, na.rm = T),
.groups = 'keep') %>%
transmute(sex_female,
name,
N = sprintf('%i (%2.1f%%)', N, N/n_sex$n[n_sex$sex == sex_female]*100),
resum = sprintf('%2.1f (%2.1f)', mn, sd))) %>%
filter(name != 'NA') %>%
select(sex_female, name, value, N, resum) %>%
pivot_wider(names_from = sex_female,
values_from = c('N', 'resum')) %>%
flextable %>%
autofit
}
#desc_bbdd(abbdd = bbdd_covar %>% select(-obesity, -T2DM))
ggplot(bbdd_covar %>%
select(rowId, age, BMI, HbA1c, Leukocytes, Monocytes, cLDL, Tg, cHDL, DBP, SBP, Glucose,
TimeT2DM, Ferritin, sex_female, obesity_BMI) %>%
pivot_longer(-c(rowId, sex_female, obesity_BMI)) %>%
mutate(name = factor(name,
levels = c('age', 'BMI', 'HbA1c', 'Glucose', 'Leukocytes', 'Monocytes',
'cLDL', 'cHDL', 'Tg', 'DBP', 'SBP', 'TimeT2DM', 'Ferritin'))),
aes(value)) +
geom_histogram(bins = 50) +
facet_grid(sex_female ~ name, scales = "free") +
theme_bw()
## Warning: Removed 59938 rows containing non-finite values (stat_bin).
Eliminem els valors superiors a mn +/- 5*SD
remove_outliers <- function(x){
m <- mean(x, na.rm = TRUE)
s <- sd(x, na.rm = TRUE)
upperbound <- m + (5*s)
lowerbound <- m - (5*s)
ifelse((x > lowerbound) & (x < upperbound), x, NaN)
}
bbdd_covar <- bbdd_covar %>%
group_by(sex_female) %>%
mutate(across(c('HbA1c', 'Leukocytes', 'Monocytes', 'cLDL', 'cHDL', 'Tg', 'DBP', 'SBP', 'Glucose',
'TimeT2DM', 'Ferritin'),
remove_outliers)) %>%
ungroup
ind_cc <- complete.cases(bbdd_covar %>%
select(rowId, age, HbA1c, BMI, TimeT2DM, Ferritin,
Leukocytes, Monocytes, cLDL, cHDL, Tg, DBP, SBP, Glucose))
#, HbA1c
bbdd <- bbdd_covar[ind_cc,] %>%
select(rowId, age, BMI, HbA1c, Leukocytes, Monocytes, cLDL, cHDL, Tg, DBP, SBP, Glucose,
TimeT2DM, sex_female, obesity_BMI, Current, Former, Ferritin,
ami, angor, stroke, tia,
A10, A10A, A10B, C01, C02, C03, C07, C08, C09, C10, M01A)
desc_bbdd(abbdd = bbdd)
name | value | N_Men | N_Women | resum_Men | resum_Women |
total | 1136 | 1500 | 1136 | 1500 | |
A10A | 1 | 147 (12.9%) | 216 (14.4%) | ||
A10B | 1 | 661 (58.2%) | 878 (58.5%) | ||
ami | 1 | 98 (8.6%) | 37 (2.5%) | ||
angor | 1 | 59 (5.2%) | 46 (3.1%) | ||
C02 | 1 | 75 (6.6%) | 75 (5.0%) | ||
C03 | 1 | 531 (46.7%) | 821 (54.7%) | ||
C07 | 1 | 315 (27.7%) | 394 (26.3%) | ||
C08 | 1 | 268 (23.6%) | 357 (23.8%) | ||
C09 | 1 | 687 (60.5%) | 885 (59.0%) | ||
C10 | 1 | 619 (54.5%) | 751 (50.1%) | ||
Current | 1 | 212 (18.7%) | 82 (5.5%) | ||
Former | 1 | 463 (40.8%) | 233 (15.5%) | ||
M01A | 1 | 763 (67.2%) | 1191 (79.4%) | ||
obesity_BMI | No obesity | 586 (51.6%) | 558 (37.2%) | ||
obesity_BMI | Obesity | 550 (48.4%) | 942 (62.8%) | ||
stroke | 1 | 88 (7.7%) | 67 (4.5%) | ||
tia | 1 | 30 (2.6%) | 51 (3.4%) | ||
age | 1136 (100.0%) | 1500 (100.0%) | 67.5 (11.7) | 70.0 (12.3) | |
BMI | 1136 (100.0%) | 1500 (100.0%) | 29.6 (4.5) | 31.7 (6.2) | |
C01 | 1136 (100.0%) | 1500 (100.0%) | 0.4 (0.5) | 0.4 (0.5) | |
cHDL | 1136 (100.0%) | 1500 (100.0%) | 45.5 (12.0) | 52.2 (13.0) | |
cLDL | 1136 (100.0%) | 1500 (100.0%) | 107.7 (33.8) | 115.5 (32.8) | |
DBP | 1136 (100.0%) | 1500 (100.0%) | 75.9 (10.5) | 75.6 (9.9) | |
Ferritin | 1136 (100.0%) | 1500 (100.0%) | 186.9 (184.3) | 95.4 (98.2) | |
Glucose | 1136 (100.0%) | 1500 (100.0%) | 142.5 (44.5) | 137.8 (40.6) | |
HbA1c | 1136 (100.0%) | 1500 (100.0%) | 6.8 (1.4) | 6.7 (1.3) | |
Leukocytes | 1136 (100.0%) | 1500 (100.0%) | 7.5 (2.1) | 7.3 (1.9) | |
Monocytes | 1136 (100.0%) | 1500 (100.0%) | 8.3 (2.2) | 7.5 (1.9) | |
SBP | 1136 (100.0%) | 1500 (100.0%) | 136.0 (16.1) | 135.3 (15.7) | |
Tg | 1136 (100.0%) | 1500 (100.0%) | 149.7 (82.5) | 150.3 (73.5) | |
TimeT2DM | 1136 (100.0%) | 1500 (100.0%) | 4.3 (4.8) | 5.0 (5.5) |
ggplot(bbdd %>%
select(rowId, age, BMI, HbA1c, Leukocytes, Monocytes, cLDL, Tg, cHDL, DBP, SBP, Glucose,
TimeT2DM, Ferritin,
sex_female, obesity_BMI) %>%
pivot_longer(-c(rowId, sex_female, obesity_BMI)) %>%
mutate(name = factor(name,
levels = c('age', 'BMI', 'HbA1c', 'Glucose', 'Leukocytes', 'Monocytes',
'cLDL', 'cHDL', 'Tg', 'DBP', 'SBP', 'TimeT2DM', 'Ferritin'))),
aes(value)) +
geom_histogram(bins = 50) +
facet_grid(sex_female ~ name, scales = "free") +
theme_bw()
bbdd %>%
select(rowId, age, BMI, HbA1c, Leukocytes, Monocytes, cLDL, Tg, cHDL, DBP, SBP, Glucose, TimeT2DM,
Ferritin,sex_female) %>%
split(f = .$sex_female) %>%
map_dfr(~{.x %>%
select(-rowId, -sex_female) %>%
cor %>%
reshape2::melt()},
.id = "sex") %>%
mutate(rsq = value^2,
to_mark = ifelse(Var1 != Var2 & rsq > .25, "*", NA),
across(c(Var1, Var2), toupper)) %>%
ggplot(aes(Var1, Var2, fill = value)) +
geom_tile() +
geom_text(aes(label = to_mark), na.rm = TRUE) +
scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, limit = c(-1,1)) +
theme_minimal() +
facet_wrap(~sex, nrow = 1) +
labs(x = NULL, y = NULL, fill = "Correlation", caption = "* Squared correlation higher than 0.25")
bbdd_resid <- bbdd %>%
mutate(HTA_med = as.numeric(C02 | C03 | C07 | C08 | C09)) %>%
split(f = .$sex_female) %>%
map(~{
.x %>%
transmute(rowId,
across(c("HbA1c", "Glucose", "Leukocytes", "Monocytes", "cLDL", "cHDL", "Tg",
"DBP", "SBP", 'TimeT2DM', 'Ferritin'),
function(v,...){
dd <- data.frame(vcol = v, a = age, b = BMI, c = Current,
d = Former, a10a = A10A, a10b = A10B, f = C10, g = HTA_med)
knots_age <- quantile(x = dd$age,
probs = c(0.2, 0.4, 0.6, 0.8))
knots_BMI <- quantile(x = dd$BMI,
probs = c(0.2, 0.4, 0.6, 0.8))
lm(vcol ~ bs(a, knots = knots_age) + bs(b, knots = knots_BMI) +
c + a10a + a10b + f + g, data = dd) %>%
resid %>% scale %>% as.vector
}))
})
ggplot(bbdd_resid %>%
bind_rows(.id = "sex_female") %>%
pivot_longer(-c(rowId, sex_female)) %>%
mutate(name = factor(name,
levels = c('age', 'BMI', 'HbA1c', 'Glucose', 'Leukocytes', 'Monocytes',
'cLDL', 'cHDL', 'Tg', 'DBP', 'SBP', 'TimeT2DM', 'Ferritin'))),
aes(value)) +
geom_histogram(bins = 50) +
facet_grid(sex_female ~ name, scales = "free") +
theme_bw()
bbdd_resid_saved <- bbdd_resid
bbdd_resid %>%
map_dfr(~{.x %>%
select(-rowId) %>%
cor %>%
reshape2::melt()},
.id = "sex") %>%
mutate(rsq = value^2,
to_mark = ifelse(Var1 != Var2 & rsq > .25, "*", NA),
across(c(Var1, Var2), toupper)) %>%
ggplot(aes(Var1, Var2, fill = value)) +
geom_tile() +
geom_text(aes(label = to_mark), na.rm = TRUE) +
scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, limit = c(-1,1)) +
theme_minimal() +
facet_wrap(~sex, nrow = 1) +
labs(x = NULL, y = NULL, fill = "Correlation", caption = "* Squared correlation higher than 0.25")
Calculem l’UMAP sense estandarditzar. Utilitzem la mètrica Manhattan.
umap_resid <- bbdd_resid %>%
map(~{
n_total <- nrow(.x)
nn <- round(10 + 15 * (log10(n_total) - 3))
# print(names(.x))
umap(X = .x %>% select(-rowId),
n_neighbors = nn,
min_dist = 0,
n_components = 2,
nn_method = "annoy",
approx_pow = TRUE,
n_sgd_threads = "auto",
binary_edge_weights = TRUE,
dens_scale = .5,
ret_extra = c("model", "nn", "fgraph"))
})
umap_embed <- bbdd_resid %>%
map2(umap_resid, ~{
dat1 <- tibble(rowId = .x$rowId)
dat2 <- data.frame(.y$embedding)
bind_cols(dat1, dat2)
})
umap_embed %>%
bind_rows(.id = "sex") %>%
ggplot(aes(X1, X2)) +
geom_point(alpha = .1) +
facet_wrap(~sex, nrow = 1) +
theme_bw() +
labs(x = "UMAP1", y = "UMAP2")
umap_embed %>%
bind_rows(.id = "sex") %>%
ggplot(aes(X1, X2)) +
geom_density_2d_filled() +
facet_wrap(~sex, nrow = 1) +
theme_bw() +
theme(legend.position = "none") +
labs(x = "UMAP1", y = "UMAP2")
umap_embed_saved <- umap_embed
umap_embed %>%
bind_rows(.id = "sex_female") %>%
{
d <- bbdd %>%
select(rowId, sex_female, age, BMI)
inner_join(., d, by = c("rowId", "sex_female"))
} %>%
pivot_longer(-c(X1, X2, sex_female, rowId)) %>%
split(f = .$name) %>%
imap(~{
mp <- median(.x$value)
.x %>%
ggplot(aes(X1, X2)) +
geom_point(aes(color = value)) +
scale_color_gradient2(midpoint = mp) +
facet_wrap(~sex_female, nrow = 1) +
theme_bw() +
labs(x = "UMAP1", y = "UMAP2",
title = ifelse(.y == "bmi", "BMI", stringr::str_to_title(.y)),
color = NULL)
}) %>%
wrap_plots(ncol = 1)
umap_embed %>%
bind_rows(.id = "sex_female") %>%
{
d <- bbdd %>%
mutate(HTA_med = as.numeric(C02 | C03 | C07 | C08 | C09),
obesity_BMI = as.numeric(obesity_BMI == 'Obesity')) %>%
select(rowId, sex_female, obesity_BMI, Current, Former, ami, angor, stroke, tia,
A10A, A10B, HTA_med, C10, M01A)
inner_join(., d, by = c("rowId", "sex_female"))
} %>%
pivot_longer(-c(X1, X2, sex_female, rowId)) %>%
mutate(name = toupper(name),
value = as.factor(value)) %>%
ggplot(aes(X1, X2)) +
geom_point(aes(color = value)) +
facet_grid(sex_female + value ~ name) +
theme_bw() +
theme(panel.grid = element_blank(),
legend.position = "top") +
labs(x = NULL,
y = NULL,
color = "Value")
umap_embed %>%
map2_dfr(bbdd_resid,
inner_join,
by = "rowId",
.id = "sex_female") %>%
tidyr::pivot_longer(-c(X1, X2, sex_female, rowId)) %>%
mutate(name = toupper(name)) %>% #,
ggplot(aes(X1, X2)) +
geom_point(aes(color = value)) +
scale_color_gradient2(limits = c(-10, 10), low = scales::muted("blue"), high = scales::muted("red")) +
facet_grid(sex_female ~ name) +
theme_bw() +
theme(panel.grid = element_blank(),
legend.position = "top") +
labs(x = NULL,
y = NULL,
color = "Value")
umap_embed %>%
map2_dfr(bbdd_resid,
inner_join,
by = "rowId",
.id = "sex_female") %>%
{
d <- bbdd %>%
select(rowId, sex_female, age, BMI)
inner_join(., d, by = c("rowId", "sex_female"))
} %>%
tidyr::pivot_longer(-c(sex_female, rowId, X1, X2)) %>%
group_by(sex_female, name) %>%
group_modify(~{
.x %>%
with(data.frame(umap_axis = c("UMAP1", "UMAP2"),
Correlation = c(cor(X1, value), cor(X2, value))))
}) %>%
mutate(name = toupper(name)) %>%
group_by(sex_female, umap_axis) %>%
group_map(~{.x %>%
ggplot(aes(Correlation, reorder(name, abs(Correlation)))) +
geom_vline(xintercept = 0, lty = "dashed") +
geom_segment(aes(xend = 0, yend = reorder(name, abs(Correlation)))) +
geom_point(color = "red") +
theme_bw() +
labs(x = NULL, y = NULL, title = paste(.y$sex_female, "-", .y$umap_axis))
}) %>%
wrap_plots
umap_fgraphs <- umap_resid %>%
map2(bbdd_resid,
~`dimnames<-`(.x$fgraph, list(.y$rowId, .y$rowId))) %>%
map(~igraph::graph_from_adjacency_matrix(.x, mode = "undirected"))
eigen_res <- umap_fgraphs %>%
map(~igraph::cluster_leading_eigen(.x))
eigen_clus <- eigen_res %>%
map(igraph::membership) %>%
map(~data.frame(rowId = as.numeric(names(.x)), cluster = as.numeric(.x)))
evcent_dat <- eigen_clus %>%
imap(~.x %>%
split(f = .$cluster) %>%
map_dfr(function(CLUS){
igraph::subgraph(umap_fgraphs[[.y]], as.character(CLUS$rowId)) %>%
igraph::evcent() %>%
pluck("vector") %>%
data.frame %>%
tibble::rownames_to_column() %>%
setNames(c("rowId", "evcent_score")) %>%
mutate(rowId = as.numeric(rowId))
},
.id = "cluster") %>%
mutate(cluster = as.numeric(cluster)))
repr_ind <- evcent_dat %>%
map(~.x %>%
group_by(cluster) %>%
slice_max(evcent_score) %>%
ungroup)
arch_mod <- bbdd_resid %>%
imap(~{mat <- select(.x, -rowId)
mat <- as.matrix(mat)
initpoints <- which(.x$rowId %in% repr_ind[[.y]]$rowId)
res <- archetypes::archetypes(
mat,
k = length(initpoints),
family = archetypes::archetypesFamily(initfn = archetypes:::make.fix.initfn(initpoints)),
verbose = FALSE,
saveHistory = FALSE)
return(res)
})
archdat <- arch_mod %>%
map(~data.frame(.x$archetypes), .id = "sex_female")
archdat %>%
map_dfr(~{.x %>%
mutate(arch_number = factor(row_number())) %>%
pivot_longer(-arch_number)
},
.id = "sex_female") %>%
mutate(name = toupper(name)) %>%
ggplot(aes(value, arch_number)) +
geom_vline(xintercept = 0) +
geom_segment(aes(xend = 0, yend = arch_number)) +
geom_point() +
facet_grid(sex_female ~ name, scales = "free_y") +
labs(x = NULL, y = NULL) +
theme_bw()
dic_women <- matrix(ncol=2, nrow=nrow(archdat$Women))
colnames(dic_women) <- c("archnum", "archnam")
for (x in 1: nrow(dic_women)){
dic_women[x,1] = x
dic_women[x,2] = paste("Cluster", x, sep = " ")
}
dic_men <- matrix(ncol=2, nrow=nrow(archdat$Men))
colnames(dic_men) <- c("archnum", "archnam")
for (x in 1: nrow(dic_men)){
dic_men[x,1] = x
dic_men[x,2] = paste("Cluster", x, sep = " ")
}
arch_dict <- list(
Women = as_tibble(dic_women),
Men = as_tibble(dic_men))
arch_labs <- archdat %>%
imap(~{bind_cols(arch_dict[[.y]], .x)})
archdat_umap <- map(
setNames(c("Men", "Women"), c("Men", "Women")), ~{
d <- select(arch_labs[[.x]], -c(archnum, archnam))
res <- umap_transform(d, umap_resid[[.x]])
res <- data.frame(res)
bind_cols(select(arch_labs[[.x]], archnam), res)
})
map(c("Men", "Women"), ~{
umap_embed[[.x]] %>%
ggplot(aes(X1, X2)) +
geom_point(alpha = .1) +
geom_point(data = archdat_umap[[.x]], aes(fill = archnam), shape = 23, size = 3) +
theme_bw() +
labs(x = "UMAP1", y = "UMAP2", title = .x, fill = "Archetypes")
}) %>%
wrap_plots(nrow = 1)
arch_alphas <- arch_mod %>%
map2(arch_labs, ~{
data.frame(.x$alphas) %>%
setNames(.y$archnam)
}) %>%
map2(bbdd_resid, ~{
.y %>%
select(rowId) %>%
bind_cols(.x)
})
arch_alphas_long <- arch_alphas %>%
map(~pivot_longer(.x, -rowId))
arch_alphas_long %>%
bind_rows(.id = "sex_female") %>%
ggplot(aes(name, value)) +
geom_boxplot(aes(group = name, fill = name)) +
theme_bw() +
theme(legend.position = "none") +
facet_wrap(~sex_female, ncol = 1) +
labs(x = NULL, y = "Probability")
arch_maxprob <- arch_alphas_long %>%
map(~{.x %>%
group_by(rowId) %>%
slice_max(value, with_ties = FALSE) %>%
ungroup
})
arch_maxprob %>%
imap(~{.x %>%
inner_join(umap_embed[[.y]], by = "rowId")
}) %>%
bind_rows(.id = "sex_female") %>%
ggplot(aes(X1, X2)) +
geom_point(aes(color = name, alpha = value)) +
guides(color = guide_legend(override.aes = list(shape = 19))) +
scale_alpha_identity() +
facet_wrap(~sex_female, nrow = 1) +
theme_bw() +
labs(x = "UMAP1", y = "UMAP2", color = "Archetypes")
arch_maxprob %>%
imap(~{d <- umap_embed[[.y]]
.x %>%
split(f = .$name) %>%
map(function(ARCH){
dd <- inner_join(ARCH, d, by = "rowId")
d %>%
ggplot(aes(X1, X2)) +
geom_point(alpha = .1) +
geom_point(data = dd, color = "red", alpha = .5, size = .2) +
theme_bw() +
labs(x = NULL, y = NULL, title = ARCH$name[1])
}) %>%
modify_at(1, function(g){ g + labs(y = .y) }) %>%
wrap_plots(nrow = 1)
}) %>%
wrap_plots(nrow = 2)
arch_maxprob %>%
imap(~{.x %>%
filter(value > .5) %>%
inner_join(umap_embed[[.y]], by = "rowId")
}) %>%
bind_rows(.id = "sex_female") %>%
ggplot(aes(X1, X2)) +
geom_point(aes(color = name, alpha = value)) +
guides(color = guide_legend(override.aes = list(shape = 19))) +
scale_alpha_identity() +
facet_wrap(~sex_female, nrow = 1) +
theme_bw() +
labs(x = "UMAP1", y = "UMAP2", color = "Archetypes")
diseases <- bbdd_covar %>% select(rowId, ami, angor, stroke, tia)
umap_disease <- umap_resid %>%
map(pluck, "embedding") %>%
map(data.frame) %>%
map2_dfr(bbdd_resid, bind_cols, .id = "sex") %>%
select(sex, rowId, X1, X2) %>%
left_join(diseases, by = "rowId")
head(umap_disease)
## sex rowId X1 X2 ami angor stroke tia
## 1 Men 3785 0.1613661 0.4415029 0 0 0 0
## 2 Men 4678 0.5333863 -0.6962384 0 0 0 0
## 3 Men 6049 0.9120204 0.2245933 0 0 0 0
## 4 Men 19645 -0.9482439 1.9006338 0 0 1 0
## 5 Men 25515 -0.7372368 -0.8155515 1 0 0 0
## 6 Men 25754 1.8516054 0.5274401 0 0 0 0
options(repr.plot.width = 2, repr.plot.height = 10)
umap_disease %>%
split(f = .$sex) %>%
imap(
~{
.x %>%
tidyr::pivot_longer(-c(rowId, sex, X1, X2), names_to = "Disease", values_to = "Diagnosis") %>%
mutate(Disease = gsub("_", " ", Disease),
Diagnosis = ifelse(Diagnosis == 1, "Yes", "No")) %>%
ggplot(aes(X1, X2)) +
geom_point(aes(color = Diagnosis,
shape = ifelse(Diagnosis == "Yes", "*", "."),
alpha = ifelse(Diagnosis == "Yes", .5, .1)),
na.rm = TRUE) +
scale_shape_identity() +
scale_alpha_identity() +
scale_color_manual(values = c("grey", "red")) +
guides(color = guide_legend(override.aes = list(alpha = 1))) +
geom_point(data = archdat_umap[[.y]], aes(fill = archnam), shape = 23, size = 3) +
facet_wrap(~Disease, nrow = 2) +
theme_bw() +
labs(title = .x$sex[1], x = "UMAP1", y = "UMAP2", fill = paste("Archetypes -", .y))
}
) %>%
patchwork::wrap_plots(ncol = 1, guides = "collect")
arch_disease <- umap_disease %>%
select(-c(X1, X2)) %>%
pivot_longer(-c(sex, rowId), names_to = "Disease", values_to = "Diagnosis") %>%
split(f = .$sex) %>%
imap(~inner_join(.x, arch_alphas_long[[.y]], by = "rowId"))
map(arch_disease, head)
## $Men
## # A tibble: 6 x 6
## sex rowId Disease Diagnosis name value
## <chr> <dbl> <chr> <dbl> <chr> <dbl>
## 1 Men 3785 ami 0 Cluster 1 0
## 2 Men 3785 ami 0 Cluster 2 0.100
## 3 Men 3785 ami 0 Cluster 3 0.360
## 4 Men 3785 ami 0 Cluster 4 0.322
## 5 Men 3785 ami 0 Cluster 5 0.124
## 6 Men 3785 ami 0 Cluster 6 0.0935
##
## $Women
## # A tibble: 6 x 6
## sex rowId Disease Diagnosis name value
## <chr> <dbl> <chr> <dbl> <chr> <dbl>
## 1 Women 13024 ami 0 Cluster 1 0.0373
## 2 Women 13024 ami 0 Cluster 2 0
## 3 Women 13024 ami 0 Cluster 3 0
## 4 Women 13024 ami 0 Cluster 4 0.105
## 5 Women 13024 ami 0 Cluster 5 0.0833
## 6 Women 13024 ami 0 Cluster 6 0.322
arch_logreg <- arch_disease %>%
map_dfr(
~{
.x %>%
mutate(
prob = ifelse(value == 0, .Machine$double.eps, value)
) %>%
split(f = .$Disease) %>%
map_dfr(
function(DX){
DX %>%
split(f = .$name) %>%
map_dfr(
function(ARCH){
suppressWarnings({
glm(Diagnosis ~ value, data = ARCH, family = binomial) %>%
broom::tidy()
})
},
.id = "Archetype"
)
},
.id = "Disease"
)
},
.id = "sex"
)
head(arch_logreg)
## # A tibble: 6 x 8
## sex Disease Archetype term estimate std.error statistic p.value
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Men ami Cluster 1 (Intercept) -2.23 0.120 -18.6 2.71e-77
## 2 Men ami Cluster 1 value -2.00 1.01 -1.99 4.64e- 2
## 3 Men ami Cluster 2 (Intercept) -2.50 0.146 -17.1 2.20e-65
## 4 Men ami Cluster 2 value 0.870 0.590 1.47 1.40e- 1
## 5 Men ami Cluster 3 (Intercept) -2.04 0.139 -14.7 3.41e-49
## 6 Men ami Cluster 3 value -2.36 0.791 -2.98 2.84e- 3
options(repr.plot.width = 8, repr.plot.height = 20)
arch_logreg %>%
filter(term == "value") %>%
filter(p.value < .1) %>%
mutate(ci = qnorm(1 - .05/2) * std.error,
conf.low = estimate - ci, conf.high = estimate + ci) %>%
ggplot(aes(estimate, interaction(Disease, Archetype))) +
geom_vline(xintercept = 0, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = Archetype)) +
scale_y_discrete(guide = ggh4x::guide_axis_nested()) +
facet_wrap(~sex, nrow = 1, scales = "free") +
theme_bw() +
theme(legend.position = "none") +
labs(title = "Significant cross-sectional associations with diseases\n",
x = "Log OR per 1% increase\nin archetypal probability", y = NULL)
### STROKE
diseases <- bbdd_covar %>% select(rowId, age, BMI,BMI, HbA1c, Glucose, Leukocytes, Monocytes,
cLDL, cHDL, Tg, DBP, SBP, TimeT2DM, Ferritin, t.ep_StrokeI, i.ep_StrokeI,
t.ep_AMI, i.ep_AMI, t.ep_Angor, i.ep_Angor, t.ep_TIA, i.ep_TIA, sex_female, ami, stroke, angor, tia)
diseases <- diseases %>% group_by(sex_female) %>% group_split() %>% map2(arch_maxprob, function(dat, arch){
dat <- arch %>% select(rowId, name) %>% left_join(dat, by="rowId")
})
diseases <- diseases %>% map(function(dat){dat <- dat %>% filter(ami == 0, stroke == 0, angor==0, tia == 0)})
diseases %>% map(function(data){
autoplot(survfit(Surv(t.ep_StrokeI, i.ep_StrokeI) ~ name, data=data), main="Stroke survival", censor.size = 0, surv.geom = "line",
conf.int.fill=NULL)
})
## [[1]]
##
## [[2]]
clusters_men <- diseases[[1]]
clusters_men$name <- as.factor(clusters_men$name)
clusters_men$name <- relevel(clusters_men$name, ref= "Cluster 1") #REFERENCIA MAYOR SUPERVIVENCIA
clusters_women <- diseases[[2]]
clusters_women <- clusters_women %>% filter(name != "Cluster 3")
clusters_women$name <- as.factor(clusters_women$name)
clusters_women$name <- relevel(clusters_women$name, ref = "Cluster 4")
clusters = list(clusters_men, clusters_women)
diseases_surv <- clusters %>%
map(function(data) {mod = coxph(Surv(t.ep_StrokeI, i.ep_StrokeI)~age + BMI+ name, data=data)
tidy(mod, conf.int= TRUE, exponentiate=TRUE)})
head(diseases_surv)
## [[1]]
## # A tibble: 8 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.08 0.0164 4.53 0.00000597 1.04 1.11
## 2 BMI 1.00 0.0379 0.126 0.900 0.933 1.08
## 3 nameCluster 2 1.56 1.12 0.400 0.689 0.175 14.0
## 4 nameCluster 3 2.44 1.06 0.839 0.401 0.304 19.6
## 5 nameCluster 4 2.53 1.07 0.866 0.387 0.310 20.6
## 6 nameCluster 5 3.42 1.12 1.10 0.272 0.381 30.7
## 7 nameCluster 6 2.45 1.04 0.859 0.390 0.318 18.8
## 8 nameCluster 7 4.09 1.08 1.30 0.192 0.492 34.0
##
## [[2]]
## # A tibble: 8 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.05 0.0139 3.37 0.000740 1.02 1.08
## 2 BMI 0.944 0.0263 -2.20 0.0275 0.896 0.994
## 3 nameCluster 1 5.43 0.691 2.45 0.0143 1.40 21.1
## 4 nameCluster 2 4.77 0.645 2.42 0.0154 1.35 16.9
## 5 nameCluster 5 3.87 0.660 2.05 0.0401 1.06 14.1
## 6 nameCluster 6 2.10 0.660 1.12 0.261 0.576 7.64
## 7 nameCluster 7 1.54 0.731 0.594 0.552 0.368 6.47
## 8 nameCluster 8 5.56 0.644 2.66 0.00774 1.57 19.7
options(repr.plot.width = 9, repr.plot.height = 3)
diseases_surv[[1]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1,6) +
labs(x = "Hazard ratio")
## Warning: Removed 6 rows containing missing values (geom_segment).
options(repr.plot.width = 9, repr.plot.height = 3)
diseases_surv[[2]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1, 6) +
labs(x = "Hazard ratio")
## Warning: Removed 6 rows containing missing values (geom_segment).
diseases %>% map(function(data){
autoplot(survfit(Surv(t.ep_AMI, i.ep_AMI) ~ name, data=data), main="AMI survival", censor.size = 0, surv.geom = "line",
conf.int.fill=NULL)
})
## [[1]]
##
## [[2]]
clusters_men <- diseases[[1]]
clusters_men$name <- as.factor(clusters_men$name)
clusters_men$name <- relevel(clusters_men$name, ref="Cluster 5") #REFERENCIA MAYOR SUPERVIVENCIA
clusters_women <- diseases[[2]]
clusters_women <- clusters_women %>% filter(name != "Cluster 8", name != "Cluster 3")
clusters_women$name <- as.factor(clusters_women$name)
clusters_women$name <- relevel(clusters_women$name, ref = "Cluster 7")
clusters = list(clusters_men, clusters_women)
diseases_surv <- clusters %>%
map(function(data) {mod = coxph(Surv(t.ep_AMI, i.ep_AMI)~age + BMI+ name, data=data)
tidy(mod, conf.int= TRUE, exponentiate=TRUE)})
head(diseases_surv)
## [[1]]
## # A tibble: 8 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.04 0.0183 2.03 0.0427 1.00 1.08
## 2 BMI 0.946 0.0471 -1.18 0.237 0.863 1.04
## 3 nameCluster 1 2.79 1.23 0.837 0.403 0.252 30.8
## 4 nameCluster 2 2.17 1.12 0.692 0.489 0.242 19.4
## 5 nameCluster 3 2.69 1.07 0.923 0.356 0.330 21.9
## 6 nameCluster 4 3.69 1.06 1.23 0.218 0.461 29.5
## 7 nameCluster 6 1.28 1.10 0.223 0.823 0.149 10.9
## 8 nameCluster 7 2.51 1.15 0.796 0.426 0.261 24.1
##
## [[2]]
## # A tibble: 7 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.05 0.0256 1.88 0.0602 0.998 1.10
## 2 BMI 1.02 0.0437 0.368 0.713 0.933 1.11
## 3 nameCluster 1 4.67 1.22 1.26 0.208 0.423 51.5
## 4 nameCluster 2 5.39 1.10 1.53 0.125 0.626 46.4
## 5 nameCluster 4 2.24 1.23 0.658 0.511 0.203 24.7
## 6 nameCluster 5 1.14 1.41 0.0917 0.927 0.0711 18.2
## 7 nameCluster 6 4.00 1.08 1.28 0.200 0.480 33.3
options(repr.plot.width = 9, repr.plot.height = 3)
diseases_surv[[1]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1,6) +
labs(x = "Hazard ratio")
## Warning: Removed 6 rows containing missing values (geom_segment).
options(repr.plot.width = 9, repr.plot.height = 3)
diseases_surv[[2]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1, 6) +
labs(x = "Hazard ratio")
## Warning: Removed 5 rows containing missing values (geom_segment).
diseases %>% map(function(data){
autoplot(survfit(Surv(t.ep_TIA, i.ep_TIA) ~ name, data=data), main="TIA survival", censor.size = 0, surv.geom = "line",
conf.int.fill=NULL)
})
## [[1]]
##
## [[2]]
clusters_men <- diseases[[1]]
clusters_men <- clusters_men %>%filter(name != "Cluster 5", name != "Cluster 1")
clusters_men$name <- as.factor(clusters_men$name)
clusters_men$name <- relevel(clusters_men$name, ref="Cluster 2") #REFERENCIA MAYOR SUPERVIVENCIA
clusters_women <- diseases[[2]]
clusters_women$name <- as.factor(clusters_women$name)
clusters_women$name <- relevel(clusters_women$name, ref = "Cluster 3")
clusters = list(clusters_men, clusters_women)
diseases_surv <- clusters %>%
map(function(data) {mod = coxph(Surv(t.ep_TIA, i.ep_TIA)~age + BMI+ name, data=data)
tidy(mod, conf.int= TRUE, exponentiate=TRUE)})
head(diseases_surv)
## [[1]]
## # A tibble: 6 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.05 0.0219 2.06 0.0395 1.00 1.09
## 2 BMI 0.974 0.0556 -0.466 0.641 0.874 1.09
## 3 nameCluster 3 2.69 0.803 1.23 0.217 0.558 13.0
## 4 nameCluster 4 1.39 0.914 0.362 0.718 0.232 8.34
## 5 nameCluster 6 1.54 0.818 0.527 0.598 0.310 7.64
## 6 nameCluster 7 2.43 0.914 0.970 0.332 0.405 14.6
##
## [[2]]
## # A tibble: 9 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.04 0.0146 2.62 0.00867 1.01 1.07
## 2 BMI 0.972 0.0279 -1.03 0.301 0.920 1.03
## 3 nameCluster 1 4.32 1.10 1.33 0.183 0.502 37.1
## 4 nameCluster 2 3.80 1.05 1.27 0.206 0.481 30.0
## 5 nameCluster 4 2.10 1.10 0.677 0.498 0.245 18.1
## 6 nameCluster 5 3.18 1.07 1.08 0.280 0.390 26.0
## 7 nameCluster 6 2.86 1.05 1.01 0.314 0.369 22.2
## 8 nameCluster 7 2.23 1.08 0.741 0.459 0.268 18.6
## 9 nameCluster 8 1.96 1.12 0.601 0.548 0.219 17.5
options(repr.plot.width = 9, repr.plot.height = 3)
diseases_surv[[1]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1,6) +
labs(x = "Hazard ratio")
## Warning: Removed 4 rows containing missing values (geom_segment).
options(repr.plot.width = 9, repr.plot.height = 3)
diseases_surv[[2]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1, 6) +
labs(x = "Hazard ratio")
## Warning: Removed 7 rows containing missing values (geom_segment).
diseases %>% map(function(data){
autoplot(survfit(Surv(t.ep_Angor, i.ep_Angor) ~ name, data=data), main="Angor survival", censor.size = 0, surv.geom = "line",
conf.int.fill=NULL)
})
## [[1]]
##
## [[2]]
clusters_men <- diseases[[1]]
clusters_men <- clusters_men %>% filter (name != "Cluster 7")
clusters_men$name <- as.factor(clusters_men$name)
clusters_men$name <- relevel(clusters_men$name, ref="Cluster 2") #REFERENCIA MAYOR SUPERVIVENCIA
clusters_women <- diseases[[2]]
clusters_women$name <- as.factor(clusters_women$name)
clusters_women$name <- relevel(clusters_women$name, ref = "Cluster 8")
clusters = list(clusters_men, clusters_women)
diseases_surv <- clusters %>%
map(function(data) {mod = coxph(Surv(t.ep_Angor, i.ep_Angor)~age + BMI+ name, data=data)
tidy(mod, conf.int= TRUE, exponentiate=TRUE)})
head(diseases_surv)
## [[1]]
## # A tibble: 7 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.03 0.0169 1.83 0.0667 0.998 1.07
## 2 BMI 1.08 0.0342 2.25 0.0244 1.01 1.15
## 3 nameCluster 1 1.09 1.23 0.0674 0.946 0.0983 12.0
## 4 nameCluster 3 2.89 0.804 1.32 0.187 0.597 13.9
## 5 nameCluster 4 2.74 0.817 1.23 0.218 0.552 13.6
## 6 nameCluster 5 2.75 0.917 1.10 0.271 0.455 16.6
## 7 nameCluster 6 3.13 0.761 1.50 0.134 0.704 13.9
##
## [[2]]
## # A tibble: 9 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.01 0.0187 0.697 0.486 0.977 1.05
## 2 BMI 0.970 0.0371 -0.827 0.408 0.902 1.04
## 3 nameCluster 1 5.92 1.16 1.54 0.124 0.614 57.1
## 4 nameCluster 2 3.39 1.12 1.09 0.275 0.379 30.4
## 5 nameCluster 3 2.00 1.42 0.489 0.625 0.125 32.0
## 6 nameCluster 4 4.86 1.10 1.44 0.149 0.567 41.7
## 7 nameCluster 5 1.99 1.23 0.561 0.575 0.180 21.9
## 8 nameCluster 6 2.71 1.10 0.909 0.363 0.316 23.2
## 9 nameCluster 7 3.32 1.12 1.07 0.283 0.370 29.8
options(repr.plot.width = 9, repr.plot.height = 3)
diseases_surv[[1]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1,6) +
labs(x = "Hazard ratio")
## Warning: Removed 5 rows containing missing values (geom_segment).
options(repr.plot.width = 9, repr.plot.height = 3)
diseases_surv[[2]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1, 6) +
labs(x = "Hazard ratio")
## Warning: Removed 7 rows containing missing values (geom_segment).
men <- rep(c("Male"), nrow(bbdd_resid[[1]]))
list_men <- cbind(bbdd_resid[[1]], men)
names <- colnames(bbdd_resid[[1]])
colnames(list_men) <- c(names, "sex_female")
women <- rep(c("Female"), nrow(bbdd_resid[[2]]))
list_women <- cbind(bbdd_resid[[2]], women)
colnames(list_women) <- c(names, "sex_female")
bbdd_resid <- rbind(list_men, list_women)
BIC_curve <- bbdd_resid %>%
select(-rowId) %>%
group_by(sex_female, .add = T) %>%
group_split() %>%
map_dfr(function(strat){
res <- Optimal_Clusters_GMM(data = strat %>% select(-sex_female),
max_clusters = 1:20, criterion = "BIC",
em_iter = 10, plot_data = FALSE)
data.frame(sex_female = strat$sex_female[1],
nclus = 1:20,
BIC = as.vector(res))
})
ggplot(BIC_curve,
aes(nclus, BIC)) +
geom_line(group = 1) +
geom_point() +
scale_x_continuous(breaks = 1:20) +
facet_wrap(~ sex_female, scales = "free") +
labs(x = "N clusters") +
theme_bw()
ggplot(data = BIC_curve %>%
mutate(bicgrad = abs(BIC - lag(BIC))) %>%
ungroup %>%
drop_na,
aes(nclus - .5, bicgrad)) +
geom_col(alpha = .3, color = "darkgrey") +
geom_line(group = 1) +
geom_point() +
scale_x_continuous(breaks = 1:20) +
facet_wrap(~ sex_female, scales = "free") +
labs(x = "N clusters", y = "BIC change") +
theme_bw()
fxclus <- function(x, k){
mod <- GMM(data = x, gaussian_comps = k, em_iter = 10)
pred <- predict_GMM(x, CENTROIDS = mod$centroids,
COVARIANCE = mod$covariance_matrices,
WEIGHTS = mod$weights)
return(list(cluster = as.integer(pred$cluster_labels)))
}
res_sil <- bbdd_resid %>%
select(-rowId) %>%
group_by(sex_female, .add = T) %>%
group_split() %>%
map_dfr(function(strat){
map_dfr(2:10, function(k){
map_dfr(1:100, function(iter){
set.seed(iter)
scaledat <- strat %>%
select(-sex_female) %>%
slice_sample(n = 1000)
d <- dist(as.matrix(scaledat))
res <- fxclus(scaledat, k)
ave_sil <- cluster::silhouette(res$cluster, d)
data.frame(nclus = k, mean_sil_width = mean(ave_sil[,"sil_width"]))
})
})
},
.id = "sex_female")
res_sil %>%
group_by(sex_female, nclus) %>%
summarise(mean_sw = mean(mean_sil_width),
se_sw = sd(mean_sil_width)/(sqrt(length(mean_sil_width))),
.groups = "drop") %>%
ggplot(aes(nclus, mean_sw)) +
geom_line(group = 1) +
geom_linerange(aes(ymin = mean_sw - se_sw, ymax = mean_sw + se_sw)) +
geom_point() +
scale_x_continuous(breaks = 1:10) +
facet_wrap(~ sex_female, nrow = 1, scales = "free") +
theme_bw() +
labs(x = "Number of clusters", y = "Mean silhouette width")
bbdd_resid %>%
select(-rowId) %>%
group_by(sex_female, .add = T) %>%
group_split() %>%
map_dfr(function(strat){
map_dfr(2:20, function(nclus){
scaledat <- strat %>%
select(-sex_female)
data.frame(cluster = fxclus(scaledat, nclus)$cluster) %>%
count(cluster) %>%
mutate(n = 100 * (n/sum(n))) %>%
slice_min(n, with_ties = FALSE) %>%
mutate(nclus)
})
},
.id = 'sex_female') %>%
ggplot(aes(nclus, n)) +
geom_line(group = 1) +
geom_point() +
geom_hline(yintercept = 1, linetype = "dashed", color = "red") +
facet_wrap(~sex_female, nrow = 1) +
theme_bw() +
labs(x = "Number of clusters", y = "Size of smallest cluster (%)")
umap_embed
## $Men
## # A tibble: 1,136 x 3
## rowId X1 X2
## <dbl> <dbl> <dbl>
## 1 3785 0.161 0.442
## 2 4678 0.533 -0.696
## 3 6049 0.912 0.225
## 4 19645 -0.948 1.90
## 5 25515 -0.737 -0.816
## 6 25754 1.85 0.527
## 7 27214 -0.646 0.937
## 8 27492 0.229 0.172
## 9 30046 -0.656 1.56
## 10 35119 0.0179 0.0525
## # ... with 1,126 more rows
##
## $Women
## # A tibble: 1,500 x 3
## rowId X1 X2
## <dbl> <dbl> <dbl>
## 1 13024 -0.795 0.638
## 2 16592 0.596 1.66
## 3 18652 -0.0850 1.09
## 4 20405 0.0744 0.920
## 5 22298 0.391 1.26
## 6 37238 0.358 0.794
## 7 46148 -0.763 1.09
## 8 46159 0.617 -0.0384
## 9 49669 -1.01 0.960
## 10 54634 -1.17 1.07
## # ... with 1,490 more rows
bbdd_umap_50_resid <- umap_embed
names(bbdd_umap_50_resid) <- c("Men", "Women")
men <- matrix(data= c("Men"), nrow=nrow(bbdd_umap_50_resid$Men))
colnames(men) =c("sex")
bbdd_umap_50_resid_men <- cbind(bbdd_umap_50_resid$Men, men)
women <- matrix(data= c("Women"), nrow=nrow(bbdd_umap_50_resid$Women))
colnames(women) =c("sex")
bbdd_umap_50_resid_women <- cbind(bbdd_umap_50_resid$Women, women)
bbdd_umap_50_resid <- rbind(bbdd_umap_50_resid_women, bbdd_umap_50_resid_men)
bbdd_umap_50_resid_saved <- bbdd_umap_50_resid
bbdd_umap_50_resid <- bbdd_umap_50_resid_saved
set.seed(12345)
gmm_res <- bbdd_resid %>%
select(-rowId) %>%
group_by(sex_female, .add = T) %>%
group_split() %>%
map(function(strat){
scaledat <- strat %>%
select(-sex_female)
mod <- GMM(data = scaledat,
gaussian_comps = 6,
em_iter = 10)
pred <- predict_GMM(scaledat,
CENTROIDS = mod$centroids,
COVARIANCE = mod$covariance_matrices,
WEIGHTS = mod$weights)
return(list(mod = mod,
pred = pred,
grp = strat$sex_female[1]))
})
gmm_res <- list(gmm_res[[2]], gmm_res[[1]])
bbdd_umap_50_resid <- bbdd_umap_50_resid %>%
group_by(sex, .add = T) %>%
group_split() %>%
map2_dfr(gmm_res,
~.x %>%
bind_cols(cluster_gmm = as.factor(.y$pred$cluster_labels)))
ggplot(bbdd_umap_50_resid) +
geom_point(aes(x = X1, y = X2, colour = cluster_gmm)) +
facet_grid(. ~ sex) +
theme_bw()
gmm_res %>%
map(
function(strata){
data.frame(MaxProb = apply(strata$pred$cluster_proba, 1, max)) %>%
ggplot(aes(MaxProb)) +
geom_histogram(bins = 10, fill = "lightblue", color = "black")
}
) %>%
wrap_plots &
theme_bw() &
labs(y = "Count")
gmm_res %>%
map_dfr(~data.frame(prob80 = apply(.x$pred$cluster_proba, 1, max) > .8), .id = "sex") %>%
ggplot(aes(sex, fill = prob80)) +
geom_bar(stat="count", position ="fill") +
theme_bw() +
scale_y_continuous(labels = scales::percent) +
labs(x = NULL, y = "%", fill = "MaxP > 80%")
gmm_res %>%
map_dfr(~data.frame(cluster = .x$pred$cluster_labels) %>%
count(cluster, name = "size") %>%
mutate(prop = size / sum(size),
sex = .x$grp)) %>%
ggplot(aes(cluster, prop * 100)) +
geom_col(fill = "lightblue", color = "black") +
facet_wrap(~sex) +
theme_bw() +
labs(x = "Cluster", y = "%")
gmm_res %>%
map2(bbdd %>%
select(HbA1c, Leukocytes, Monocytes, cLDL, Tg, cHDL, DBP, SBP, Glucose, TimeT2DM, Ferritin,
sex_female) %>%
group_by(sex_female, .add = T) %>%
group_split(),
function(strata, data){
varnames <- names(select(data, -c(sex_female)))
centroid_dat <- strata$mod$centroids %>%
data.frame %>%
setNames(paste0(varnames, "_mean"))
sd_dat <- strata$mod$covariance_matrices %>%
sqrt %>%
data.frame %>%
setNames(paste0(varnames, "_sd"))
cbind(centroid_dat, sd_dat) %>%
tibble::rownames_to_column("component") %>%
pivot_longer(-component, names_sep = "_", names_to = c("name", ".value")) %>%
mutate(sd = qnorm(1 - (0.05/2)) * sd) %>%
ggplot(aes(mean, component)) +
geom_vline(xintercept = 0, linetype = "dashed", color = "gray") +
geom_vline(xintercept = c(-1, 1) * qnorm(1 - .2/2), lty = "dashed", color = "darkred") +
geom_linerange(aes(xmin = mean - sd, xmax = mean + sd)) +
geom_point(size = 2) +
facet_wrap(~name, ncol = 3, scales = "free_x") +
labs(title = strata$grp, x = NULL, y = NULL) +
theme_bw()
}) %>%
wrap_plots(nrow = 1)
bbdd %>%
select(age, BMI, HbA1c, Leukocytes, Monocytes, cLDL, Tg, cHDL, DBP, SBP, Glucose, TimeT2DM, Ferritin,
sex_female) %>%
group_by(sex_female, .add = T) %>%
group_split() %>%
map2(bbdd_umap_50_resid %>%
group_by(sex_female = sex, .add = T) %>%
group_split(),
~.x %>%
bind_cols(.y %>% select(-sex_female, -X1, -X2)) %>%
pivot_longer(-c(cluster_gmm, sex, sex_female)) %>%
group_by(name) %>%
mutate(mval = mean(value)) %>%
ungroup %>%
mutate(name = factor(name,
levels = c('age', 'BMI', 'HbA1c', 'Glucose', 'Leukocytes', 'Monocytes',
'cLDL', 'cHDL', 'Tg', 'DBP', 'SBP', 'TimeT2DM', 'Ferritin'))) %>%
ggplot(aes(cluster_gmm, value)) +
geom_boxplot(aes(group = cluster_gmm, colour = cluster_gmm)) +
geom_hline(aes(yintercept = mval), linetype = "dashed", color = "red") +
facet_wrap(~name, scales = "free", nrow = 1) +
theme_bw() +
theme(legend.position = 'none') +
labs(title = .x$sex[1], x = NULL, y = NULL)) %>%
modify_at(2, ~.x + labs(x = "Cluster")) %>%
wrap_plots(ncol = 1)
## Warning: Unknown or uninitialised column: `sex`.
## Unknown or uninitialised column: `sex`.
gmm_6 <- gmm_res
bbdd_umap_50_resid <- bbdd_umap_50_resid_saved
set.seed(12345)
gmm_res <- bbdd_resid %>%
select(-rowId) %>%
group_by(sex_female, .add = T) %>%
group_split() %>%
map(function(strat){
scaledat <- strat %>%
select(-sex_female)
mod <- GMM(data = scaledat,
gaussian_comps = 7,
em_iter = 10)
pred <- predict_GMM(scaledat,
CENTROIDS = mod$centroids,
COVARIANCE = mod$covariance_matrices,
WEIGHTS = mod$weights)
return(list(mod = mod,
pred = pred,
grp = strat$sex_female[1]))
})
gmm_res <- list(gmm_res[[2]], gmm_res[[1]])
bbdd_umap_50_resid <- bbdd_umap_50_resid %>%
group_by(sex, .add = T) %>%
group_split() %>%
map2_dfr(gmm_res,
~.x %>%
bind_cols(cluster_gmm = as.factor(.y$pred$cluster_labels)))
ggplot(bbdd_umap_50_resid) +
geom_point(aes(x = X1, y = X2, colour = cluster_gmm)) +
facet_grid(. ~ sex) +
theme_bw()
gmm_res %>%
map(
function(strata){
data.frame(MaxProb = apply(strata$pred$cluster_proba, 1, max)) %>%
ggplot(aes(MaxProb)) +
geom_histogram(bins = 10, fill = "lightblue", color = "black")
}
) %>%
wrap_plots &
theme_bw() &
labs(y = "Count")
gmm_res %>%
map_dfr(~data.frame(prob80 = apply(.x$pred$cluster_proba, 1, max) > .8), .id = "sex") %>%
ggplot(aes(sex, fill = prob80)) +
geom_bar(stat="count", position ="fill") +
theme_bw() +
scale_y_continuous(labels = scales::percent) +
labs(x = NULL, y = "%", fill = "MaxP > 80%")
gmm_res %>%
map_dfr(~data.frame(cluster = .x$pred$cluster_labels) %>%
count(cluster, name = "size") %>%
mutate(prop = size / sum(size),
sex = .x$grp)) %>%
ggplot(aes(cluster, prop * 100)) +
geom_col(fill = "lightblue", color = "black") +
facet_wrap(~sex) +
theme_bw() +
labs(x = "Cluster", y = "%")
gmm_res %>%
map2(bbdd %>%
select(HbA1c, Leukocytes, Monocytes, cLDL, Tg, cHDL, DBP, SBP, Glucose, TimeT2DM, Ferritin,
sex_female) %>%
group_by(sex_female, .add = T) %>%
group_split(),
function(strata, data){
varnames <- names(select(data, -c(sex_female)))
centroid_dat <- strata$mod$centroids %>%
data.frame %>%
setNames(paste0(varnames, "_mean"))
sd_dat <- strata$mod$covariance_matrices %>%
sqrt %>%
data.frame %>%
setNames(paste0(varnames, "_sd"))
cbind(centroid_dat, sd_dat) %>%
tibble::rownames_to_column("component") %>%
pivot_longer(-component, names_sep = "_", names_to = c("name", ".value")) %>%
mutate(sd = qnorm(1 - (0.05/2)) * sd) %>%
ggplot(aes(mean, component)) +
geom_vline(xintercept = 0, linetype = "dashed", color = "gray") +
geom_vline(xintercept = c(-1, 1) * qnorm(1 - .2/2), lty = "dashed", color = "darkred") +
geom_linerange(aes(xmin = mean - sd, xmax = mean + sd)) +
geom_point(size = 2) +
facet_wrap(~name, ncol = 3, scales = "free_x") +
labs(title = strata$grp, x = NULL, y = NULL) +
theme_bw()
}) %>%
wrap_plots(nrow = 1)
bbdd %>%
select(age, BMI, HbA1c, Leukocytes, Monocytes, cLDL, Tg, cHDL, DBP, SBP, Glucose, TimeT2DM, Ferritin,
sex_female) %>%
group_by(sex_female, .add = T) %>%
group_split() %>%
map2(bbdd_umap_50_resid %>%
group_by(sex_female = sex, .add = T) %>%
group_split(),
~.x %>%
bind_cols(.y %>% select(-sex_female, -X1, -X2)) %>%
pivot_longer(-c(cluster_gmm, sex, sex_female)) %>%
group_by(name) %>%
mutate(mval = mean(value)) %>%
ungroup %>%
mutate(name = factor(name,
levels = c('age', 'BMI', 'HbA1c', 'Glucose', 'Leukocytes', 'Monocytes',
'cLDL', 'cHDL', 'Tg', 'DBP', 'SBP', 'TimeT2DM', 'Ferritin'))) %>%
ggplot(aes(cluster_gmm, value)) +
geom_boxplot(aes(group = cluster_gmm, colour = cluster_gmm)) +
geom_hline(aes(yintercept = mval), linetype = "dashed", color = "red") +
facet_wrap(~name, scales = "free", nrow = 1) +
theme_bw() +
theme(legend.position = 'none') +
labs(title = .x$sex[1], x = NULL, y = NULL)) %>%
modify_at(2, ~.x + labs(x = "Cluster")) %>%
wrap_plots(ncol = 1)
## Warning: Unknown or uninitialised column: `sex`.
## Unknown or uninitialised column: `sex`.
gmm_7 <- gmm_res
gmm_res <- gmm_7
bbdd_umap_50_resid <- bbdd_umap_50_resid_saved
set.seed(12345)
gmm_res <- bbdd_resid %>%
select(-rowId) %>%
group_by(sex_female, .add = T) %>%
group_split() %>%
map(function(strat){
scaledat <- strat %>%
select(-sex_female)
mod <- GMM(data = scaledat,
gaussian_comps = 8,
em_iter = 10)
pred <- predict_GMM(scaledat,
CENTROIDS = mod$centroids,
COVARIANCE = mod$covariance_matrices,
WEIGHTS = mod$weights)
return(list(mod = mod,
pred = pred,
grp = strat$sex_female[1]))
})
gmm_res <- list(gmm_res[[2]], gmm_res[[1]])
bbdd_umap_50_resid <- bbdd_umap_50_resid %>%
group_by(sex, .add = T) %>%
group_split() %>%
map2_dfr(gmm_res,
~.x %>%
bind_cols(cluster_gmm = as.factor(.y$pred$cluster_labels)))
ggplot(bbdd_umap_50_resid) +
geom_point(aes(x = X1, y = X2, colour = cluster_gmm)) +
facet_grid(. ~ sex) +
theme_bw()
gmm_res %>%
map(
function(strata){
data.frame(MaxProb = apply(strata$pred$cluster_proba, 1, max)) %>%
ggplot(aes(MaxProb)) +
geom_histogram(bins = 10, fill = "lightblue", color = "black")
}
) %>%
wrap_plots &
theme_bw() &
labs(y = "Count")
gmm_res %>%
map_dfr(~data.frame(prob80 = apply(.x$pred$cluster_proba, 1, max) > .8), .id = "sex") %>%
ggplot(aes(sex, fill = prob80)) +
geom_bar(stat="count", position ="fill") +
theme_bw() +
scale_y_continuous(labels = scales::percent) +
labs(x = NULL, y = "%", fill = "MaxP > 80%")
gmm_res %>%
map_dfr(~data.frame(cluster = .x$pred$cluster_labels) %>%
count(cluster, name = "size") %>%
mutate(prop = size / sum(size),
sex = .x$grp)) %>%
ggplot(aes(cluster, prop * 100)) +
geom_col(fill = "lightblue", color = "black") +
facet_wrap(~sex) +
theme_bw() +
labs(x = "Cluster", y = "%")
gmm_res %>%
map2(bbdd %>%
select(HbA1c, Leukocytes, Monocytes, cLDL, Tg, cHDL, DBP, SBP, Glucose, TimeT2DM, Ferritin,
sex_female) %>%
group_by(sex_female, .add = T) %>%
group_split(),
function(strata, data){
varnames <- names(select(data, -c(sex_female)))
centroid_dat <- strata$mod$centroids %>%
data.frame %>%
setNames(paste0(varnames, "_mean"))
sd_dat <- strata$mod$covariance_matrices %>%
sqrt %>%
data.frame %>%
setNames(paste0(varnames, "_sd"))
cbind(centroid_dat, sd_dat) %>%
tibble::rownames_to_column("component") %>%
pivot_longer(-component, names_sep = "_", names_to = c("name", ".value")) %>%
mutate(sd = qnorm(1 - (0.05/2)) * sd) %>%
ggplot(aes(mean, component)) +
geom_vline(xintercept = 0, linetype = "dashed", color = "gray") +
geom_vline(xintercept = c(-1, 1) * qnorm(1 - .2/2), lty = "dashed", color = "darkred") +
geom_linerange(aes(xmin = mean - sd, xmax = mean + sd)) +
geom_point(size = 2) +
facet_wrap(~name, ncol = 3, scales = "free_x") +
labs(title = strata$grp, x = NULL, y = NULL) +
theme_bw()
}) %>%
wrap_plots(nrow = 1)
bbdd %>%
select(age, BMI, HbA1c, Leukocytes, Monocytes, cLDL, Tg, cHDL, DBP, SBP, Glucose, TimeT2DM, Ferritin,
sex_female) %>%
group_by(sex_female, .add = T) %>%
group_split() %>%
map2(bbdd_umap_50_resid %>%
group_by(sex_female = sex, .add = T) %>%
group_split(),
~.x %>%
bind_cols(.y %>% select(-sex_female, -X1, -X2)) %>%
pivot_longer(-c(cluster_gmm, sex, sex_female)) %>%
group_by(name) %>%
mutate(mval = mean(value)) %>%
ungroup %>%
mutate(name = factor(name,
levels = c('age', 'BMI', 'HbA1c', 'Glucose', 'Leukocytes', 'Monocytes',
'cLDL', 'cHDL', 'Tg', 'DBP', 'SBP', 'TimeT2DM', 'Ferritin'))) %>%
ggplot(aes(cluster_gmm, value)) +
geom_boxplot(aes(group = cluster_gmm, colour = cluster_gmm)) +
geom_hline(aes(yintercept = mval), linetype = "dashed", color = "red") +
facet_wrap(~name, scales = "free", nrow = 1) +
theme_bw() +
theme(legend.position = 'none') +
labs(title = .x$sex[1], x = NULL, y = NULL)) %>%
modify_at(2, ~.x + labs(x = "Cluster")) %>%
wrap_plots(ncol = 1)
## Warning: Unknown or uninitialised column: `sex`.
## Unknown or uninitialised column: `sex`.
gmm_8 <- gmm_res
bbdd_umap_50_resid <- bbdd_umap_50_resid_saved
set.seed(12345)
gmm_res <- bbdd_resid %>%
select(-rowId) %>%
group_by(sex_female, .add = T) %>%
group_split() %>%
map(function(strat){
scaledat <- strat %>%
select(-sex_female)
mod <- GMM(data = scaledat,
gaussian_comps = 9,
em_iter = 10)
pred <- predict_GMM(scaledat,
CENTROIDS = mod$centroids,
COVARIANCE = mod$covariance_matrices,
WEIGHTS = mod$weights)
return(list(mod = mod,
pred = pred,
grp = strat$sex_female[1]))
})
gmm_res <- list(gmm_res[[2]], gmm_res[[1]])
bbdd_umap_50_resid <- bbdd_umap_50_resid %>%
group_by(sex, .add = T) %>%
group_split() %>%
map2_dfr(gmm_res,
~.x %>%
bind_cols(cluster_gmm = as.factor(.y$pred$cluster_labels)))
ggplot(bbdd_umap_50_resid) +
geom_point(aes(x = X1, y = X2, colour = cluster_gmm)) +
facet_grid(. ~ sex) +
theme_bw()
gmm_res %>%
map(
function(strata){
data.frame(MaxProb = apply(strata$pred$cluster_proba, 1, max)) %>%
ggplot(aes(MaxProb)) +
geom_histogram(bins = 10, fill = "lightblue", color = "black")
}
) %>%
wrap_plots &
theme_bw() &
labs(y = "Count")
gmm_res %>%
map_dfr(~data.frame(prob80 = apply(.x$pred$cluster_proba, 1, max) > .8), .id = "sex") %>%
ggplot(aes(sex, fill = prob80)) +
geom_bar(stat="count", position ="fill") +
theme_bw() +
scale_y_continuous(labels = scales::percent) +
labs(x = NULL, y = "%", fill = "MaxP > 80%")
gmm_res %>%
map_dfr(~data.frame(cluster = .x$pred$cluster_labels) %>%
count(cluster, name = "size") %>%
mutate(prop = size / sum(size),
sex = .x$grp)) %>%
ggplot(aes(cluster, prop * 100)) +
geom_col(fill = "lightblue", color = "black") +
facet_wrap(~sex) +
theme_bw() +
labs(x = "Cluster", y = "%")
gmm_res %>%
map2(bbdd %>%
select(HbA1c, Leukocytes, Monocytes, cLDL, Tg, cHDL, DBP, SBP, Glucose, TimeT2DM, Ferritin,
sex_female) %>%
group_by(sex_female, .add = T) %>%
group_split(),
function(strata, data){
varnames <- names(select(data, -c(sex_female)))
centroid_dat <- strata$mod$centroids %>%
data.frame %>%
setNames(paste0(varnames, "_mean"))
sd_dat <- strata$mod$covariance_matrices %>%
sqrt %>%
data.frame %>%
setNames(paste0(varnames, "_sd"))
cbind(centroid_dat, sd_dat) %>%
tibble::rownames_to_column("component") %>%
pivot_longer(-component, names_sep = "_", names_to = c("name", ".value")) %>%
mutate(sd = qnorm(1 - (0.05/2)) * sd) %>%
ggplot(aes(mean, component)) +
geom_vline(xintercept = 0, linetype = "dashed", color = "gray") +
geom_vline(xintercept = c(-1, 1) * qnorm(1 - .2/2), lty = "dashed", color = "darkred") +
geom_linerange(aes(xmin = mean - sd, xmax = mean + sd)) +
geom_point(size = 2) +
facet_wrap(~name, ncol = 3, scales = "free_x") +
labs(title = strata$grp, x = NULL, y = NULL) +
theme_bw()
}) %>%
wrap_plots(nrow = 1)
bbdd %>%
select(age, BMI, HbA1c, Leukocytes, Monocytes, cLDL, Tg, cHDL, DBP, SBP, Glucose, TimeT2DM, Ferritin,
sex_female) %>%
group_by(sex_female, .add = T) %>%
group_split() %>%
map2(bbdd_umap_50_resid %>%
group_by(sex_female = sex, .add = T) %>%
group_split(),
~.x %>%
bind_cols(.y %>% select(-sex_female, -X1, -X2)) %>%
pivot_longer(-c(cluster_gmm, sex, sex_female)) %>%
group_by(name) %>%
mutate(mval = mean(value)) %>%
ungroup %>%
mutate(name = factor(name,
levels = c('age', 'BMI', 'HbA1c', 'Glucose', 'Leukocytes', 'Monocytes',
'cLDL', 'cHDL', 'Tg', 'DBP', 'SBP', 'TimeT2DM', 'Ferritin'))) %>%
ggplot(aes(cluster_gmm, value)) +
geom_boxplot(aes(group = cluster_gmm, colour = cluster_gmm)) +
geom_hline(aes(yintercept = mval), linetype = "dashed", color = "red") +
facet_wrap(~name, scales = "free", nrow = 1) +
theme_bw() +
theme(legend.position = 'none') +
labs(title = .x$sex[1], x = NULL, y = NULL)) %>%
modify_at(2, ~.x + labs(x = "Cluster")) %>%
wrap_plots(ncol = 1)
## Warning: Unknown or uninitialised column: `sex`.
## Unknown or uninitialised column: `sex`.
gmm_9 <- gmm_res
10 clusters
bbdd_umap_50_resid <- bbdd_umap_50_resid_saved
set.seed(12345)
gmm_res <- bbdd_resid %>%
select(-rowId) %>%
group_by(sex_female, .add = T) %>%
group_split() %>%
map(function(strat){
scaledat <- strat %>%
select(-sex_female)
mod <- GMM(data = scaledat,
gaussian_comps = 10,
em_iter = 10)
pred <- predict_GMM(scaledat,
CENTROIDS = mod$centroids,
COVARIANCE = mod$covariance_matrices,
WEIGHTS = mod$weights)
return(list(mod = mod,
pred = pred,
grp = strat$sex_female[1]))
})
gmm_res <- list(gmm_res[[2]], gmm_res[[1]])
bbdd_umap_50_resid <- bbdd_umap_50_resid %>%
group_by(sex, .add = T) %>%
group_split() %>%
map2_dfr(gmm_res,
~.x %>%
bind_cols(cluster_gmm = as.factor(.y$pred$cluster_labels)))
ggplot(bbdd_umap_50_resid) +
geom_point(aes(x = X1, y = X2, colour = cluster_gmm)) +
facet_grid(. ~ sex) +
theme_bw()
gmm_res %>%
map(
function(strata){
data.frame(MaxProb = apply(strata$pred$cluster_proba, 1, max)) %>%
ggplot(aes(MaxProb)) +
geom_histogram(bins = 10, fill = "lightblue", color = "black")
}
) %>%
wrap_plots &
theme_bw() &
labs(y = "Count")
gmm_res %>%
map_dfr(~data.frame(prob80 = apply(.x$pred$cluster_proba, 1, max) > .8), .id = "sex") %>%
ggplot(aes(sex, fill = prob80)) +
geom_bar(stat="count", position ="fill") +
theme_bw() +
scale_y_continuous(labels = scales::percent) +
labs(x = NULL, y = "%", fill = "MaxP > 80%")
gmm_res %>%
map_dfr(~data.frame(cluster = .x$pred$cluster_labels) %>%
count(cluster, name = "size") %>%
mutate(prop = size / sum(size),
sex = .x$grp)) %>%
ggplot(aes(cluster, prop * 100)) +
geom_col(fill = "lightblue", color = "black") +
facet_wrap(~sex) +
theme_bw() +
labs(x = "Cluster", y = "%")
gmm_res %>%
map2(bbdd %>%
select(HbA1c, Leukocytes, Monocytes, cLDL, Tg, cHDL, DBP, SBP, Glucose, TimeT2DM, Ferritin,
sex_female) %>%
group_by(sex_female, .add = T) %>%
group_split(),
function(strata, data){
varnames <- names(select(data, -c(sex_female)))
centroid_dat <- strata$mod$centroids %>%
data.frame %>%
setNames(paste0(varnames, "_mean"))
sd_dat <- strata$mod$covariance_matrices %>%
sqrt %>%
data.frame %>%
setNames(paste0(varnames, "_sd"))
cbind(centroid_dat, sd_dat) %>%
tibble::rownames_to_column("component") %>%
pivot_longer(-component, names_sep = "_", names_to = c("name", ".value")) %>%
mutate(sd = qnorm(1 - (0.05/2)) * sd) %>%
ggplot(aes(mean, component)) +
geom_vline(xintercept = 0, linetype = "dashed", color = "gray") +
geom_vline(xintercept = c(-1, 1) * qnorm(1 - .2/2), lty = "dashed", color = "darkred") +
geom_linerange(aes(xmin = mean - sd, xmax = mean + sd)) +
geom_point(size = 2) +
facet_wrap(~name, ncol = 3, scales = "free_x") +
labs(title = strata$grp, x = NULL, y = NULL) +
theme_bw()
}) %>%
wrap_plots(nrow = 1)
bbdd %>%
select(age, BMI, HbA1c, Leukocytes, Monocytes, cLDL, Tg, cHDL, DBP, SBP, Glucose, TimeT2DM, Ferritin,
sex_female) %>%
group_by(sex_female, .add = T) %>%
group_split() %>%
map2(bbdd_umap_50_resid %>%
group_by(sex_female = sex, .add = T) %>%
group_split(),
~.x %>%
bind_cols(.y %>% select(-sex_female, -X1, -X2)) %>%
pivot_longer(-c(cluster_gmm, sex, sex_female)) %>%
group_by(name) %>%
mutate(mval = mean(value)) %>%
ungroup %>%
mutate(name = factor(name,
levels = c('age', 'BMI', 'HbA1c', 'Glucose', 'Leukocytes', 'Monocytes',
'cLDL', 'cHDL', 'Tg', 'DBP', 'SBP', 'TimeT2DM', 'Ferritin'))) %>%
ggplot(aes(cluster_gmm, value)) +
geom_boxplot(aes(group = cluster_gmm, colour = cluster_gmm)) +
geom_hline(aes(yintercept = mval), linetype = "dashed", color = "red") +
facet_wrap(~name, scales = "free", nrow = 1) +
theme_bw() +
theme(legend.position = 'none') +
labs(title = .x$sex[1], x = NULL, y = NULL)) %>%
modify_at(2, ~.x + labs(x = "Cluster")) %>%
wrap_plots(ncol = 1)
## Warning: Unknown or uninitialised column: `sex`.
## Unknown or uninitialised column: `sex`.
gmm10 <- gmm_res
create_bbdd_clusters <- function(gmm_res){
gmm_res <- list(gmm_res[[2]], gmm_res[[1]])
bbdd_resid_sex <- bbdd_resid %>% group_by(sex_female) %>% group_split()
bbdd_resid_clusters <- list()
bbdd_resid_clusters[[2]] <- cbind(bbdd_resid_sex[[1]], gmm_res[[1]]$pred$cluster_labels)
bbdd_resid_clusters[[1]] <- cbind(bbdd_resid_sex[[2]], gmm_res[[2]]$pred$cluster_labels)
bbdd_covar_sex <- bbdd_covar %>% group_by(sex_female) %>% group_split()
bbdd_num <- list()
bbdd_num[[1]] <- bbdd_covar_sex[[1]][which(bbdd_covar_sex[[1]]$rowId %in% bbdd_resid_clusters[[1]]$rowId),]
bbdd_num[[2]] <- bbdd_covar_sex[[2]][which(bbdd_covar_sex[[2]]$rowId %in% bbdd_resid_clusters[[2]]$rowId),]
n <- colnames(bbdd_num[[1]])
bbdd_clusters <- list()
bbdd_clusters[[1]] <- cbind(bbdd_num[[1]], gmm_res[[2]]$pred$cluster_labels)
bbdd_clusters[[2]] <- cbind(bbdd_num[[2]], gmm_res[[1]]$pred$cluster_labels)
colnames(bbdd_clusters[[1]]) <- c(n, "clusters")
colnames(bbdd_clusters[[2]]) <- c(n, "clusters")
bbdd_clusters[[1]] <- bbdd_clusters[[1]] %>% filter(ami == 0, stroke == 0, angor==0, tia == 0)
bbdd_clusters[[2]] <- bbdd_clusters[[2]] %>% filter(ami == 0, stroke == 0, angor==0, tia == 0)
bbdd_clusters <- rbind(bbdd_clusters[[1]], bbdd_clusters[[2]])
bbdd_clusters <- bbdd_clusters %>% select(rowId, age, BMI, HbA1c, Leukocytes, Monocytes, cLDL,
sex_female, Tg, cHDL, DBP, SBP, Glucose, TimeT2DM, Ferritin, clusters,
t.ep_StrokeI, i.ep_StrokeI, stroke, t.ep_TIA, i.ep_TIA, tia,
t.ep_Angor, i.ep_Angor, angor, t.ep_AMI, i.ep_AMI, ami)
bbdd_clusters$clusters <- as.factor(bbdd_clusters$clusters)
return(bbdd_clusters)
}
bbdd_clusters_6 <- create_bbdd_clusters(gmm_6)
head(bbdd_clusters_6)
## rowId age BMI HbA1c Leukocytes Monocytes cLDL sex_female Tg cHDL DBP SBP
## 1 3785 61 19.27 6.5 6.6 8.8 123 Men 87 75 80 150
## 2 4678 67 24.71 7.7 9.5 5.4 83 Men 62 60 60 110
## 3 6049 56 34.32 7.9 7.3 5.6 110 Men 74 52 84 123
## 4 25754 70 30.11 10.2 8.1 7.0 220 Men 168 47 72 130
## 5 27214 71 24.91 5.7 3.5 7.1 122 Men 214 58 60 120
## 6 30046 71 33.96 6.5 6.6 11.5 107 Men 160 48 75 145
## Glucose TimeT2DM Ferritin clusters t.ep_StrokeI i.ep_StrokeI stroke t.ep_TIA
## 1 136 10.023272 100.7 3 4199 0 0 4199
## 2 227 8.996578 154.8 5 3246 0 0 3246
## 3 150 0.000000 297.9 1 481 0 0 481
## 4 278 0.000000 165.0 6 1411 0 0 1411
## 5 109 4.818617 279.1 4 4199 0 0 4199
## 6 105 0.000000 579.0 4 2352 0 0 2352
## i.ep_TIA tia t.ep_Angor i.ep_Angor angor t.ep_AMI i.ep_AMI ami
## 1 0 0 4199 0 0 4199 0 0
## 2 0 0 3246 0 0 3246 0 0
## 3 0 0 481 0 0 481 0 0
## 4 0 0 1411 0 0 1411 0 0
## 5 0 0 4199 0 0 4199 0 0
## 6 0 0 2352 0 0 2352 0 0
bbdd_clusters_6 %>% group_by(sex_female) %>% group_split %>% map(function(data){
autoplot(survfit(Surv(t.ep_StrokeI, i.ep_StrokeI) ~ clusters, data=data), main="Stroke survival", censor.size = 0, surv.geom = "line",
conf.int.fill=NULL)
})
## [[1]]
##
## [[2]]
summary(as.factor(bbdd_clusters_6$clusters))
## 1 2 3 4 5 6
## 310 279 438 490 466 231
bbdd_clusters_6$clusters <- as.factor(bbdd_clusters_6$clusters)
clusters_men <- bbdd_clusters_6 %>% filter(sex_female=="Men") #FILTRO CUANDO NO APARECE EVENTO
clusters_men$clusters <- relevel(clusters_men$clusters, 4) #REFERENCIA MAYOR SUPERVIVENCIA
clusters_women <- bbdd_clusters_6 %>% filter(sex_female=="Women")
clusters_women$clusters <- relevel(clusters_women$clusters, ref = 2)
clusters = list(clusters_men, clusters_women)
bbdd_clusters_6_surv <- clusters %>%
map(function(data) {mod = coxph(Surv(t.ep_StrokeI, i.ep_StrokeI)~age + BMI+ clusters, data=data)
tidy(mod, conf.int= TRUE, exponentiate=TRUE)})
head(bbdd_clusters_6_surv)
## [[1]]
## # A tibble: 7 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.07 0.0164 4.38 0.0000119 1.04 1.11
## 2 BMI 0.996 0.0373 -0.0995 0.921 0.926 1.07
## 3 clusters1 4.48 0.765 1.96 0.0498 1.00 20.0
## 4 clusters2 2.56 0.819 1.15 0.251 0.515 12.7
## 5 clusters3 2.24 0.765 1.06 0.290 0.502 10.0
## 6 clusters5 2.30 0.802 1.04 0.299 0.478 11.1
## 7 clusters6 3.24 0.914 1.29 0.198 0.541 19.4
##
## [[2]]
## # A tibble: 7 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.04 0.0133 3.29 0.00101 1.02 1.07
## 2 BMI 0.956 0.0254 -1.79 0.0739 0.909 1.00
## 3 clusters1 1.77 0.518 1.10 0.270 0.642 4.89
## 4 clusters3 1.11 0.579 0.176 0.861 0.356 3.44
## 5 clusters4 1.01 0.495 0.0301 0.976 0.385 2.68
## 6 clusters5 1.57 0.467 0.969 0.333 0.630 3.92
## 7 clusters6 1.11 0.580 0.178 0.858 0.356 3.45
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_6_surv[[1]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1,6) +
labs(x = "Hazard ratio")
## Warning: Removed 5 rows containing missing values (geom_segment).
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_6_surv[[2]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1, 6) +
labs(x = "Hazard ratio")
bbdd_clusters_6 %>% group_by(sex_female) %>% group_split %>% map(function(data){
autoplot(survfit(Surv(t.ep_TIA, i.ep_TIA) ~ clusters, data=data), main="TIA survival", censor.size = 0, surv.geom = "line",
conf.int.fill=NULL)
})
## [[1]]
##
## [[2]]
summary(as.factor(bbdd_clusters_6$clusters))
## 1 2 3 4 5 6
## 310 279 438 490 466 231
bbdd_clusters_6$clusters <- as.factor(bbdd_clusters_6$clusters)
clusters_men <- bbdd_clusters_6 %>% filter(sex_female=="Men") #FILTRO CUANDO NO APARECE EVENTO
clusters_men$clusters <- relevel(clusters_men$clusters, 5) #REFERENCIA MAYOR SUPERVIVENCIA
clusters_women <- bbdd_clusters_6 %>% filter(sex_female=="Women")
clusters_women$clusters <- relevel(clusters_women$clusters, ref = 6)
clusters = list(clusters_men, clusters_women)
bbdd_clusters_6_surv <- clusters %>%
map(function(data) {mod = coxph(Surv(t.ep_TIA, i.ep_TIA)~age + BMI+ clusters, data=data)
tidy(mod, conf.int= TRUE, exponentiate=TRUE)})
head(bbdd_clusters_6_surv)
## [[1]]
## # A tibble: 7 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.05 0.0218 2.13 0.0328 1.00 1.09
## 2 BMI 0.966 0.0538 -0.634 0.526 0.870 1.07
## 3 clusters1 4.45 1.12 1.33 0.182 0.497 39.9
## 4 clusters2 4.15 1.16 1.23 0.218 0.431 40.0
## 5 clusters3 4.62 1.06 1.44 0.149 0.578 37.0
## 6 clusters4 6.15 1.12 1.62 0.104 0.687 55.1
## 7 clusters6 3.09 1.42 0.797 0.426 0.193 49.6
##
## [[2]]
## # A tibble: 7 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.04 0.0145 2.64 0.00825 1.01 1.07
## 2 BMI 0.975 0.0275 -0.916 0.360 0.924 1.03
## 3 clusters1 1.45 0.647 0.578 0.563 0.409 5.16
## 4 clusters2 1.34 0.648 0.454 0.650 0.377 4.78
## 5 clusters3 1.74 0.629 0.876 0.381 0.506 5.95
## 6 clusters4 1.61 0.563 0.848 0.397 0.534 4.86
## 7 clusters5 1.10 0.593 0.168 0.867 0.346 3.53
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_6_surv[[1]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1,8) +
labs(x = "Hazard ratio")
## Warning: Removed 5 rows containing missing values (geom_segment).
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_6_surv[[2]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1, 8) +
labs(x = "Hazard ratio")
bbdd_clusters_6 %>% group_by(sex_female) %>% group_split %>% map(function(data){
autoplot(survfit(Surv(t.ep_AMI, i.ep_AMI) ~ clusters, data=data), main="AMI survival", censor.size = 0, surv.geom = "line",
conf.int.fill=NULL)
})
## [[1]]
##
## [[2]]
summary(as.factor(bbdd_clusters_6$clusters))
## 1 2 3 4 5 6
## 310 279 438 490 466 231
bbdd_clusters_6$clusters <- as.factor(bbdd_clusters_6$clusters)
clusters_men <- bbdd_clusters_6 %>% filter(clusters != 6, sex_female=="Men") #FILTRO CUANDO NO APARECE EVENTO
clusters_men$clusters <- relevel(clusters_men$clusters, 3) #REFERENCIA MAYOR SUPERVIVENCIA
clusters_women <- bbdd_clusters_6 %>% filter(clusters != 4, sex_female=="Women")
clusters_women$clusters <- relevel(clusters_women$clusters, ref = 6)
clusters = list(clusters_men, clusters_women)
bbdd_clusters_6_surv <- clusters %>%
map(function(data) {mod = coxph(Surv(t.ep_AMI, i.ep_AMI)~age + BMI+ clusters, data=data)
tidy(mod, conf.int= TRUE, exponentiate=TRUE)})
head(bbdd_clusters_6_surv)
## [[1]]
## # A tibble: 7 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.03 0.0184 1.75 0.0809 0.996 1.07
## 2 BMI 0.942 0.0468 -1.27 0.203 0.860 1.03
## 3 clusters1 2.50 0.606 1.51 0.132 0.760 8.19
## 4 clusters2 4.90 0.549 2.90 0.00379 1.67 14.4
## 5 clusters4 3.31 0.607 1.97 0.0484 1.01 10.9
## 6 clusters5 1.09 0.731 0.112 0.911 0.259 4.55
## 7 clusters6 NA 0 NA NA NA NA
##
## [[2]]
## # A tibble: 7 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.05 0.0258 1.71 0.0874 0.994 1.10
## 2 BMI 1.01 0.0439 0.251 0.802 0.928 1.10
## 3 clusters1 0.990 1.00 -0.0104 0.992 0.139 7.07
## 4 clusters2 2.31 0.842 0.992 0.321 0.443 12.0
## 5 clusters3 1.06 1.00 0.0594 0.953 0.148 7.60
## 6 clusters4 NA 0 NA NA NA NA
## 7 clusters5 1.29 0.819 0.308 0.758 0.258 6.41
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_6_surv[[1]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1,6) +
labs(x = "Hazard ratio")
## Warning: Removed 4 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_point).
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_6_surv[[2]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1, 6) +
labs(x = "Hazard ratio")
## Warning: Removed 5 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_point).
bbdd_clusters_6 %>% group_by(sex_female) %>% group_split %>% map(function(data){
autoplot(survfit(Surv(t.ep_Angor, i.ep_Angor) ~ clusters, data=data), main="Angor survival", censor.size = 0, surv.geom = "line",
conf.int.fill=NULL)
})
## [[1]]
##
## [[2]]
summary(as.factor(bbdd_clusters_6$clusters))
## 1 2 3 4 5 6
## 310 279 438 490 466 231
bbdd_clusters_6$clusters <- as.factor(bbdd_clusters_6$clusters)
clusters_men <- bbdd_clusters_6 %>% filter(sex_female=="Men") #FILTRO CUANDO NO APARECE EVENTO
clusters_men$clusters <- relevel(clusters_men$clusters,6) #REFERENCIA MAYOR SUPERVIVENCIA
clusters_women <- bbdd_clusters_6 %>% filter(sex_female=="Women")
clusters_women$clusters <- relevel(clusters_women$clusters, ref = 6)
clusters = list(clusters_men, clusters_women)
bbdd_clusters_6_surv <- clusters %>%
map(function(data) {mod = coxph(Surv(t.ep_Angor, i.ep_Angor)~age + BMI+ clusters, data=data)
tidy(mod, conf.int= TRUE, exponentiate=TRUE)})
head(bbdd_clusters_6_surv)
## [[1]]
## # A tibble: 7 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.03 0.0167 1.59 0.112 0.994 1.06
## 2 BMI 1.08 0.0344 2.30 0.0212 1.01 1.16
## 3 clusters1 1.71 1.12 0.479 0.632 0.189 15.5
## 4 clusters2 5.15 1.07 1.53 0.126 0.631 42.0
## 5 clusters3 1.94 1.07 0.621 0.535 0.240 15.7
## 6 clusters4 3.39 1.08 1.13 0.258 0.408 28.3
## 7 clusters5 2.07 1.10 0.662 0.508 0.240 17.8
##
## [[2]]
## # A tibble: 7 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.01 0.0189 0.781 0.435 0.978 1.05
## 2 BMI 0.970 0.0371 -0.817 0.414 0.902 1.04
## 3 clusters1 3.14 0.818 1.40 0.161 0.633 15.6
## 4 clusters2 2.02 0.870 0.810 0.418 0.368 11.1
## 5 clusters3 1.11 1.00 0.101 0.919 0.155 7.89
## 6 clusters4 1.39 0.817 0.402 0.687 0.280 6.90
## 7 clusters5 1.17 0.838 0.189 0.850 0.227 6.06
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_6_surv[[1]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1,6) +
labs(x = "Hazard ratio")
## Warning: Removed 5 rows containing missing values (geom_segment).
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_6_surv[[2]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1, 6) +
labs(x = "Hazard ratio")
## Warning: Removed 5 rows containing missing values (geom_segment).
bbdd_clusters_7 <- create_bbdd_clusters(gmm_7)
head(bbdd_clusters_7)
## rowId age BMI HbA1c Leukocytes Monocytes cLDL sex_female Tg cHDL DBP SBP
## 1 3785 61 19.27 6.5 6.6 8.8 123 Men 87 75 80 150
## 2 4678 67 24.71 7.7 9.5 5.4 83 Men 62 60 60 110
## 3 6049 56 34.32 7.9 7.3 5.6 110 Men 74 52 84 123
## 4 25754 70 30.11 10.2 8.1 7.0 220 Men 168 47 72 130
## 5 27214 71 24.91 5.7 3.5 7.1 122 Men 214 58 60 120
## 6 30046 71 33.96 6.5 6.6 11.5 107 Men 160 48 75 145
## Glucose TimeT2DM Ferritin clusters t.ep_StrokeI i.ep_StrokeI stroke t.ep_TIA
## 1 136 10.023272 100.7 2 4199 0 0 4199
## 2 227 8.996578 154.8 1 3246 0 0 3246
## 3 150 0.000000 297.9 5 481 0 0 481
## 4 278 0.000000 165.0 6 1411 0 0 1411
## 5 109 4.818617 279.1 1 4199 0 0 4199
## 6 105 0.000000 579.0 4 2352 0 0 2352
## i.ep_TIA tia t.ep_Angor i.ep_Angor angor t.ep_AMI i.ep_AMI ami
## 1 0 0 4199 0 0 4199 0 0
## 2 0 0 3246 0 0 3246 0 0
## 3 0 0 481 0 0 481 0 0
## 4 0 0 1411 0 0 1411 0 0
## 5 0 0 4199 0 0 4199 0 0
## 6 0 0 2352 0 0 2352 0 0
bbdd_clusters_7 %>% group_by(sex_female) %>% group_split %>% map(function(data){
autoplot(survfit(Surv(t.ep_StrokeI, i.ep_StrokeI) ~ clusters, data=data), main="Stroke survival", censor.size = 0, surv.geom = "line",
conf.int.fill=NULL)
})
## [[1]]
##
## [[2]]
summary(as.factor(bbdd_clusters_7$clusters))
## 1 2 3 4 5 6 7
## 341 277 350 355 446 213 232
bbdd_clusters_7$clusters <- as.factor(bbdd_clusters_7$clusters)
clusters_men <- bbdd_clusters_7 %>% filter(sex_female=="Men") #FILTRO CUANDO NO APARECE EVENTO
clusters_men$clusters <- relevel(clusters_men$clusters, 2) #REFERENCIA MAYOR SUPERVIVENCIA
clusters_women <- bbdd_clusters_7 %>% filter(sex_female=="Women")
clusters_women$clusters <- relevel(clusters_women$clusters, ref = 1)
clusters = list(clusters_men, clusters_women)
bbdd_clusters_7_surv <- clusters %>%
map(function(data) {mod = coxph(Surv(t.ep_StrokeI, i.ep_StrokeI)~age + BMI+ clusters, data=data)
tidy(mod, conf.int= TRUE, exponentiate=TRUE)})
head(bbdd_clusters_7_surv)
## [[1]]
## # A tibble: 8 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.08 0.0166 4.52 0.00000617 1.04 1.11
## 2 BMI 1.00 0.0375 0.0852 0.932 0.932 1.08
## 3 clusters1 2.19 0.691 1.13 0.257 0.565 8.49
## 4 clusters3 1.43 0.660 0.542 0.588 0.392 5.21
## 5 clusters4 1.37 0.818 0.387 0.699 0.276 6.82
## 6 clusters5 2.18 0.669 1.16 0.244 0.588 8.08
## 7 clusters6 2.54 0.768 1.22 0.224 0.565 11.4
## 8 clusters7 1.27 0.707 0.334 0.738 0.317 5.07
##
## [[2]]
## # A tibble: 8 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.05 0.0133 3.35 0.000820 1.02 1.07
## 2 BMI 0.953 0.0259 -1.86 0.0634 0.906 1.00
## 3 clusters2 1.36 0.488 0.633 0.527 0.523 3.54
## 4 clusters3 2.94 0.476 2.26 0.0236 1.16 7.46
## 5 clusters4 0.948 0.487 -0.110 0.912 0.365 2.46
## 6 clusters5 1.11 0.452 0.241 0.810 0.460 2.70
## 7 clusters6 1.23 0.541 0.388 0.698 0.427 3.56
## 8 clusters7 1.67 0.540 0.944 0.345 0.577 4.80
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_7_surv[[1]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1,6) +
labs(x = "Hazard ratio")
## Warning: Removed 4 rows containing missing values (geom_segment).
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_7_surv[[2]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1, 6) +
labs(x = "Hazard ratio")
## Warning: Removed 1 rows containing missing values (geom_segment).
bbdd_clusters_7 %>% group_by(sex_female) %>% group_split %>% map(function(data){
autoplot(survfit(Surv(t.ep_TIA, i.ep_TIA) ~ clusters, data=data), main="TIA survival", censor.size = 0, surv.geom = "line",
conf.int.fill=NULL)
})
## [[1]]
##
## [[2]]
summary(as.factor(bbdd_clusters_7$clusters))
## 1 2 3 4 5 6 7
## 341 277 350 355 446 213 232
bbdd_clusters_7$clusters <- as.factor(bbdd_clusters_7$clusters)
clusters_men <- bbdd_clusters_7 %>% filter(clusters !=2, sex_female=="Men") #FILTRO CUANDO NO APARECE EVENTO
clusters_men$clusters <- relevel(clusters_men$clusters, 6) #REFERENCIA MAYOR SUPERVIVENCIA
clusters_women <- bbdd_clusters_7 %>% filter(sex_female=="Women")
clusters_women$clusters <- relevel(clusters_women$clusters, ref = 6)
clusters = list(clusters_men, clusters_women)
bbdd_clusters_7_surv <- clusters %>%
map(function(data) {mod = coxph(Surv(t.ep_TIA, i.ep_TIA)~age + BMI+ clusters, data=data)
tidy(mod, conf.int= TRUE, exponentiate=TRUE)})
head(bbdd_clusters_7_surv)
## [[1]]
## # A tibble: 8 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.05 0.0222 2.06 0.0394 1.00 1.09
## 2 BMI 0.967 0.0547 -0.612 0.540 0.869 1.08
## 3 clusters1 1.07 1.23 0.0580 0.954 0.0964 12.0
## 4 clusters2 NA 0 NA NA NA NA
## 5 clusters3 1.44 1.08 0.338 0.735 0.173 12.0
## 6 clusters4 1.47 1.23 0.314 0.754 0.133 16.2
## 7 clusters5 1.60 1.12 0.423 0.673 0.179 14.4
## 8 clusters7 2.28 1.09 0.759 0.448 0.271 19.2
##
## [[2]]
## # A tibble: 8 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.04 0.0146 2.66 0.00784 1.01 1.07
## 2 BMI 0.975 0.0278 -0.904 0.366 0.923 1.03
## 3 clusters1 2.30 0.652 1.28 0.201 0.641 8.27
## 4 clusters2 2.02 0.678 1.04 0.298 0.536 7.65
## 5 clusters3 3.30 0.690 1.73 0.0835 0.854 12.8
## 6 clusters4 1.39 0.678 0.485 0.628 0.368 5.24
## 7 clusters5 1.34 0.667 0.436 0.663 0.362 4.94
## 8 clusters7 0.891 0.914 -0.126 0.900 0.149 5.34
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_7_surv[[1]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1,6) +
labs(x = "Hazard ratio")
## Warning: Removed 6 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_point).
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_7_surv[[2]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1, 6) +
labs(x = "Hazard ratio")
## Warning: Removed 3 rows containing missing values (geom_segment).
bbdd_clusters_7 %>% group_by(sex_female) %>% group_split %>% map(function(data){
autoplot(survfit(Surv(t.ep_AMI, i.ep_AMI) ~ clusters, data=data), main="AMI survival", censor.size = 0, surv.geom = "line",
conf.int.fill=NULL)
})
## [[1]]
##
## [[2]]
summary(as.factor(bbdd_clusters_7$clusters))
## 1 2 3 4 5 6 7
## 341 277 350 355 446 213 232
bbdd_clusters_7$clusters <- as.factor(bbdd_clusters_7$clusters)
clusters_men <- bbdd_clusters_7 %>% filter(clusters != 6, sex_female=="Men") #FILTRO CUANDO NO APARECE EVENTO
clusters_men$clusters <- relevel(clusters_men$clusters, 3) #REFERENCIA MAYOR SUPERVIVENCIA
clusters_women <- bbdd_clusters_7 %>% filter(clusters !=4, sex_female=="Women")
clusters_women$clusters <- relevel(clusters_women$clusters, ref = 6)
clusters = list(clusters_men, clusters_women)
bbdd_clusters_7_surv <- clusters %>%
map(function(data) {mod = coxph(Surv(t.ep_AMI, i.ep_AMI)~age + BMI+ clusters, data=data)
tidy(mod, conf.int= TRUE, exponentiate=TRUE)})
head(bbdd_clusters_7_surv)
## [[1]]
## # A tibble: 8 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.03 0.0183 1.79 0.0742 0.997 1.07
## 2 BMI 0.942 0.0458 -1.30 0.194 0.861 1.03
## 3 clusters1 3.24 0.766 1.54 0.124 0.724 14.5
## 4 clusters2 5.52 0.709 2.41 0.0160 1.38 22.1
## 5 clusters4 5.78 0.731 2.40 0.0164 1.38 24.2
## 6 clusters5 3.63 0.708 1.82 0.0685 0.907 14.5
## 7 clusters6 NA 0 NA NA NA NA
## 8 clusters7 3.50 0.712 1.76 0.0782 0.868 14.1
##
## [[2]]
## # A tibble: 8 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.05 0.0260 1.81 0.0695 0.996 1.10
## 2 BMI 1.02 0.0456 0.339 0.734 0.929 1.11
## 3 clusters1 1.33 1.23 0.230 0.818 0.120 14.7
## 4 clusters2 3.74 1.10 1.20 0.229 0.436 32.2
## 5 clusters3 4.24 1.16 1.25 0.211 0.440 40.8
## 6 clusters4 NA 0 NA NA NA NA
## 7 clusters5 1.68 1.12 0.465 0.642 0.188 15.1
## 8 clusters7 2.83 1.23 0.848 0.396 0.256 31.3
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_7_surv[[1]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1,6) +
labs(x = "Hazard ratio")
## Warning: Removed 6 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_point).
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_7_surv[[2]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1, 6) +
labs(x = "Hazard ratio")
## Warning: Removed 6 rows containing missing values (geom_segment).
## Removed 1 rows containing missing values (geom_point).
bbdd_clusters_7 %>% group_by(sex_female) %>% group_split %>% map(function(data){
autoplot(survfit(Surv(t.ep_Angor, i.ep_Angor) ~ clusters, data=data), main="Angor survival", censor.size = 0, surv.geom = "line",
conf.int.fill=NULL)
})
## [[1]]
##
## [[2]]
summary(as.factor(bbdd_clusters_7$clusters))
## 1 2 3 4 5 6 7
## 341 277 350 355 446 213 232
bbdd_clusters_7$clusters <- as.factor(bbdd_clusters_7$clusters)
clusters_men <- bbdd_clusters_7 %>% filter(sex_female=="Men") #FILTRO CUANDO NO APARECE EVENTO
clusters_men$clusters <- relevel(clusters_men$clusters, 1) #REFERENCIA MAYOR SUPERVIVENCIA
clusters_women <- bbdd_clusters_7 %>% filter(sex_female=="Women")
clusters_women$clusters <- relevel(clusters_women$clusters, ref = 4)
clusters = list(clusters_men, clusters_women)
bbdd_clusters_7_surv <- clusters %>%
map(function(data) {mod = coxph(Surv(t.ep_Angor, i.ep_Angor)~age + BMI+ clusters, data=data)
tidy(mod, conf.int= TRUE, exponentiate=TRUE)})
head(bbdd_clusters_7_surv)
## [[1]]
## # A tibble: 8 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.03 0.0167 1.72 0.0855 0.996 1.06
## 2 BMI 1.08 0.0344 2.14 0.0322 1.01 1.15
## 3 clusters2 3.32 0.818 1.47 0.142 0.668 16.5
## 4 clusters3 1.84 0.782 0.782 0.434 0.398 8.54
## 5 clusters4 2.40 0.869 1.01 0.313 0.437 13.2
## 6 clusters5 1.69 0.837 0.627 0.531 0.328 8.72
## 7 clusters6 1.50 1.00 0.406 0.685 0.210 10.7
## 8 clusters7 1.49 0.868 0.460 0.646 0.272 8.18
##
## [[2]]
## # A tibble: 8 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.02 0.0192 0.826 0.409 0.978 1.05
## 2 BMI 0.969 0.0376 -0.835 0.403 0.900 1.04
## 3 clusters1 6.11 1.10 1.65 0.0988 0.713 52.3
## 4 clusters2 5.87 1.12 1.58 0.114 0.655 52.5
## 5 clusters3 10.3 1.12 2.09 0.0368 1.15 92.6
## 6 clusters5 5.73 1.07 1.63 0.103 0.705 46.7
## 7 clusters6 1.82 1.41 0.422 0.673 0.114 29.1
## 8 clusters7 7.81 1.15 1.78 0.0751 0.812 75.2
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_7_surv[[1]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1,6) +
labs(x = "Hazard ratio")
## Warning: Removed 6 rows containing missing values (geom_segment).
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_7_surv[[2]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1, 6) +
labs(x = "Hazard ratio")
## Warning: Removed 6 rows containing missing values (geom_segment).
## Warning: Removed 3 rows containing missing values (geom_point).
bbdd_clusters_8 <- create_bbdd_clusters(gmm_8)
head(bbdd_clusters_8)
## rowId age BMI HbA1c Leukocytes Monocytes cLDL sex_female Tg cHDL DBP SBP
## 1 3785 61 19.27 6.5 6.6 8.8 123 Men 87 75 80 150
## 2 4678 67 24.71 7.7 9.5 5.4 83 Men 62 60 60 110
## 3 6049 56 34.32 7.9 7.3 5.6 110 Men 74 52 84 123
## 4 25754 70 30.11 10.2 8.1 7.0 220 Men 168 47 72 130
## 5 27214 71 24.91 5.7 3.5 7.1 122 Men 214 58 60 120
## 6 30046 71 33.96 6.5 6.6 11.5 107 Men 160 48 75 145
## Glucose TimeT2DM Ferritin clusters t.ep_StrokeI i.ep_StrokeI stroke t.ep_TIA
## 1 136 10.023272 100.7 7 4199 0 0 4199
## 2 227 8.996578 154.8 4 3246 0 0 3246
## 3 150 0.000000 297.9 8 481 0 0 481
## 4 278 0.000000 165.0 6 1411 0 0 1411
## 5 109 4.818617 279.1 3 4199 0 0 4199
## 6 105 0.000000 579.0 4 2352 0 0 2352
## i.ep_TIA tia t.ep_Angor i.ep_Angor angor t.ep_AMI i.ep_AMI ami
## 1 0 0 4199 0 0 4199 0 0
## 2 0 0 3246 0 0 3246 0 0
## 3 0 0 481 0 0 481 0 0
## 4 0 0 1411 0 0 1411 0 0
## 5 0 0 4199 0 0 4199 0 0
## 6 0 0 2352 0 0 2352 0 0
bbdd_clusters_8 %>% group_by(sex_female) %>% group_split %>% map(function(data){
autoplot(survfit(Surv(t.ep_StrokeI, i.ep_StrokeI) ~ clusters, data=data), main="Stroke survival", censor.size = 0, surv.geom = "line",
conf.int.fill=NULL)
})
## [[1]]
##
## [[2]]
summary(as.factor(bbdd_clusters_8$clusters))
## 1 2 3 4 5 6 7 8
## 326 237 244 266 363 176 341 261
bbdd_clusters_8$clusters <- as.factor(bbdd_clusters_8$clusters)
clusters_men <- bbdd_clusters_8 %>% filter(sex_female=="Men") #FILTRO CUANDO NO APARECE EVENTO
clusters_men$clusters <- relevel(clusters_men$clusters, 4) #REFERENCIA MAYOR SUPERVIVENCIA
clusters_women <- bbdd_clusters_8 %>% filter(sex_female=="Women")
clusters_women$clusters <- relevel(clusters_women$clusters, ref = 4)
clusters = list(clusters_men, clusters_women)
bbdd_clusters_8_surv <- clusters %>%
map(function(data) {mod = coxph(Surv(t.ep_StrokeI, i.ep_StrokeI)~age + BMI+ clusters, data=data)
tidy(mod, conf.int= TRUE, exponentiate=TRUE)})
head(bbdd_clusters_8_surv)
## [[1]]
## # A tibble: 9 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.08 0.0166 4.44 0.00000900 1.04 1.11
## 2 BMI 1.00 0.0384 -0.00122 0.999 0.927 1.08
## 3 clusters1 4.59 1.10 1.39 0.165 0.534 39.5
## 4 clusters2 2.77 1.16 0.883 0.377 0.288 26.7
## 5 clusters3 4.67 1.05 1.46 0.144 0.591 36.9
## 6 clusters5 2.35 1.16 0.739 0.460 0.244 22.6
## 7 clusters6 4.72 1.16 1.34 0.179 0.490 45.5
## 8 clusters7 2.69 1.06 0.931 0.352 0.336 21.5
## 9 clusters8 5.52 1.05 1.63 0.104 0.706 43.1
##
## [[2]]
## # A tibble: 9 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.05 0.0134 3.41 0.000660 1.02 1.07
## 2 BMI 0.950 0.0261 -1.95 0.0507 0.903 1.00
## 3 clusters1 1.53 0.613 0.692 0.489 0.460 5.08
## 4 clusters2 1.72 0.646 0.841 0.400 0.485 6.11
## 5 clusters3 5.05 0.613 2.64 0.00826 1.52 16.8
## 6 clusters5 2.10 0.572 1.30 0.194 0.685 6.46
## 7 clusters6 3.11 0.628 1.81 0.0708 0.908 10.7
## 8 clusters7 3.15 0.584 1.96 0.0496 1.00 9.88
## 9 clusters8 1.85 0.708 0.870 0.384 0.462 7.42
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_8_surv[[1]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1,6) +
labs(x = "Hazard ratio")
## Warning: Removed 7 rows containing missing values (geom_segment).
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_8_surv[[2]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1, 6) +
labs(x = "Hazard ratio")
## Warning: Removed 6 rows containing missing values (geom_segment).
bbdd_clusters_8 %>% group_by(sex_female) %>% group_split %>% map(function(data){
autoplot(survfit(Surv(t.ep_TIA, i.ep_TIA) ~ clusters, data=data), main="TIA survival", censor.size = 0, surv.geom = "line",
conf.int.fill=NULL)
})
## [[1]]
##
## [[2]]
summary(as.factor(bbdd_clusters_8$clusters))
## 1 2 3 4 5 6 7 8
## 326 237 244 266 363 176 341 261
bbdd_clusters_8$clusters <- as.factor(bbdd_clusters_8$clusters)
clusters_men <- bbdd_clusters_8 %>% filter(clusters !=5, sex_female=="Men") #FILTRO CUANDO NO APARECE EVENTO
clusters_men$clusters <- relevel(clusters_men$clusters, ref = 2) #REFERENCIA MAYOR SUPERVIVENCIA
clusters_women <- bbdd_clusters_8 %>% filter(sex_female=="Women")
clusters_women$clusters <- relevel(clusters_women$clusters, ref = 8)
clusters = list(clusters_men, clusters_women)
bbdd_clusters_8_surv <- clusters %>%
map(function(data) {mod = coxph(Surv(t.ep_TIA, i.ep_TIA)~age + BMI+ clusters, data=data)
tidy(mod, conf.int= TRUE, exponentiate=TRUE)})
head(bbdd_clusters_8_surv)
## [[1]]
## # A tibble: 9 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.05 0.0224 2.06 0.0398 1.00 1.09
## 2 BMI 0.964 0.0570 -0.647 0.517 0.862 1.08
## 3 clusters1 2.85 1.16 0.904 0.366 0.295 27.5
## 4 clusters3 3.19 1.08 1.07 0.283 0.383 26.6
## 5 clusters4 1.00 1.41 0.00289 0.998 0.0627 16.1
## 6 clusters5 NA 0 NA NA NA NA
## 7 clusters6 1.52 1.42 0.294 0.769 0.0945 24.4
## 8 clusters7 1.38 1.12 0.288 0.773 0.154 12.4
## 9 clusters8 2.77 1.10 0.929 0.353 0.323 23.7
##
## [[2]]
## # A tibble: 9 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.04 0.0145 2.69 0.00716 1.01 1.07
## 2 BMI 0.973 0.0278 -0.978 0.328 0.922 1.03
## 3 clusters1 1.12 0.678 0.166 0.868 0.296 4.23
## 4 clusters2 1.27 0.710 0.335 0.738 0.316 5.10
## 5 clusters3 2.16 0.731 1.05 0.293 0.515 9.03
## 6 clusters4 1.31 0.691 0.395 0.693 0.339 5.09
## 7 clusters5 1.25 0.659 0.345 0.730 0.345 4.56
## 8 clusters6 1.22 0.764 0.255 0.799 0.272 5.43
## 9 clusters7 1.06 0.731 0.0790 0.937 0.253 4.44
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_8_surv[[1]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1,6) +
labs(x = "Hazard ratio")
## Warning: Removed 7 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_point).
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_8_surv[[2]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1, 6) +
labs(x = "Hazard ratio")
## Warning: Removed 1 rows containing missing values (geom_segment).
bbdd_clusters_8 %>% group_by(sex_female) %>% group_split %>% map(function(data){
autoplot(survfit(Surv(t.ep_AMI, i.ep_AMI) ~ clusters, data=data), main="AMI survival", censor.size = 0, surv.geom = "line",
conf.int.fill=NULL)
})
## [[1]]
##
## [[2]]
summary(as.factor(bbdd_clusters_8$clusters))
## 1 2 3 4 5 6 7 8
## 326 237 244 266 363 176 341 261
bbdd_clusters_8$clusters <- as.factor(bbdd_clusters_8$clusters)
clusters_men <- bbdd_clusters_8 %>% filter(clusters !=6, sex_female=="Men") #FILTRO CUANDO NO APARECE EVENTO
clusters_men$clusters <- relevel(clusters_men$clusters, 5) #REFERENCIA MAYOR SUPERVIVENCIA
clusters_women <- bbdd_clusters_8 %>% filter(clusters !=8, clusters !=4, sex_female=="Women")
clusters_women$clusters <- relevel(clusters_women$clusters, ref = 5)
clusters = list(clusters_men, clusters_women)
bbdd_clusters_8_surv <- clusters %>%
map(function(data) {mod = coxph(Surv(t.ep_AMI, i.ep_AMI)~age + BMI+ clusters, data=data)
tidy(mod, conf.int= TRUE, exponentiate=TRUE)})
head(bbdd_clusters_8_surv)
## [[1]]
## # A tibble: 9 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.03 0.0186 1.77 0.0764 0.997 1.07
## 2 BMI 0.943 0.0477 -1.23 0.217 0.859 1.04
## 3 clusters1 5.69 1.10 1.58 0.114 0.659 49.1
## 4 clusters2 6.40 1.10 1.69 0.0908 0.745 54.9
## 5 clusters3 2.48 1.12 0.813 0.416 0.277 22.3
## 6 clusters4 5.42 1.12 1.51 0.131 0.603 48.6
## 7 clusters6 NA 0 NA NA NA NA
## 8 clusters7 2.56 1.08 0.869 0.385 0.307 21.4
## 9 clusters8 3.68 1.10 1.19 0.235 0.429 31.6
##
## [[2]]
## # A tibble: 9 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.04 0.0258 1.67 0.0948 0.993 1.10
## 2 BMI 1.01 0.0454 0.325 0.745 0.928 1.11
## 3 clusters1 3.82 1.16 1.16 0.247 0.395 36.9
## 4 clusters2 7.33 1.12 1.78 0.0752 0.817 65.8
## 5 clusters3 3.69 1.42 0.922 0.356 0.230 59.1
## 6 clusters4 NA 0 NA NA NA NA
## 7 clusters6 7.75 1.16 1.77 0.0763 0.805 74.6
## 8 clusters7 8.98 1.10 2.00 0.0452 1.05 77.0
## 9 clusters8 NA 0 NA NA NA NA
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_8_surv[[1]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1,10) +
labs(x = "Hazard ratio")
## Warning: Removed 7 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_point).
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_8_surv[[2]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1, 10) +
labs(x = "Hazard ratio")
## Warning: Removed 7 rows containing missing values (geom_segment).
## Warning: Removed 2 rows containing missing values (geom_point).
bbdd_clusters_8 %>% group_by(sex_female) %>% group_split %>% map(function(data){
autoplot(survfit(Surv(t.ep_Angor, i.ep_Angor) ~ clusters, data=data), main="Angor survival", censor.size = 0, surv.geom = "line",
conf.int.fill=NULL)
})
## [[1]]
##
## [[2]]
summary(as.factor(bbdd_clusters_8$clusters))
## 1 2 3 4 5 6 7 8
## 326 237 244 266 363 176 341 261
bbdd_clusters_8$clusters <- as.factor(bbdd_clusters_8$clusters)
clusters_men <- bbdd_clusters_8 %>% filter(sex_female=="Men") #FILTRO CUANDO NO APARECE EVENTO
clusters_men$clusters <- relevel(clusters_men$clusters, 6) #REFERENCIA MAYOR SUPERVIVENCIA
clusters_women <- bbdd_clusters_8 %>% filter(sex_female=="Women")
clusters_women$clusters <- relevel(clusters_women$clusters, ref = 4)
clusters = list(clusters_men, clusters_women)
bbdd_clusters_8_surv <- clusters %>%
map(function(data) {mod = coxph(Surv(t.ep_Angor, i.ep_Angor)~age + BMI+ clusters, data=data)
tidy(mod, conf.int= TRUE, exponentiate=TRUE)})
head(bbdd_clusters_8_surv)
## [[1]]
## # A tibble: 9 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.03 0.0164 1.71 0.0868 0.996 1.06
## 2 BMI 1.08 0.0351 2.16 0.0307 1.01 1.16
## 3 clusters1 1.76 1.23 0.458 0.647 0.157 19.7
## 4 clusters2 4.86 1.09 1.46 0.146 0.578 40.8
## 5 clusters3 2.42 1.08 0.817 0.414 0.290 20.3
## 6 clusters4 1.58 1.23 0.374 0.708 0.143 17.5
## 7 clusters5 3.09 1.10 1.03 0.303 0.361 26.5
## 8 clusters7 1.79 1.09 0.537 0.591 0.213 15.0
## 9 clusters8 1.75 1.12 0.497 0.619 0.193 15.8
##
## [[2]]
## # A tibble: 9 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.02 0.0191 0.856 0.392 0.979 1.06
## 2 BMI 0.968 0.0378 -0.862 0.388 0.899 1.04
## 3 clusters1 4.61 1.08 1.41 0.157 0.554 38.3
## 4 clusters2 3.51 1.16 1.09 0.277 0.365 33.8
## 5 clusters3 6.69 1.16 1.65 0.0999 0.695 64.4
## 6 clusters5 3.82 1.08 1.24 0.215 0.459 31.8
## 7 clusters6 4.67 1.16 1.33 0.183 0.484 44.9
## 8 clusters7 2.23 1.23 0.653 0.514 0.202 24.6
## 9 clusters8 1.65 1.42 0.355 0.723 0.103 26.5
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_8_surv[[1]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1,10) +
labs(x = "Hazard ratio")
## Warning: Removed 7 rows containing missing values (geom_segment).
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_8_surv[[2]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1, 10) +
labs(x = "Hazard ratio")
## Warning: Removed 7 rows containing missing values (geom_segment).
bbdd_clusters_9 <- create_bbdd_clusters(gmm_9)
head(bbdd_clusters_9)
## rowId age BMI HbA1c Leukocytes Monocytes cLDL sex_female Tg cHDL DBP SBP
## 1 3785 61 19.27 6.5 6.6 8.8 123 Men 87 75 80 150
## 2 4678 67 24.71 7.7 9.5 5.4 83 Men 62 60 60 110
## 3 6049 56 34.32 7.9 7.3 5.6 110 Men 74 52 84 123
## 4 25754 70 30.11 10.2 8.1 7.0 220 Men 168 47 72 130
## 5 27214 71 24.91 5.7 3.5 7.1 122 Men 214 58 60 120
## 6 30046 71 33.96 6.5 6.6 11.5 107 Men 160 48 75 145
## Glucose TimeT2DM Ferritin clusters t.ep_StrokeI i.ep_StrokeI stroke t.ep_TIA
## 1 136 10.023272 100.7 7 4199 0 0 4199
## 2 227 8.996578 154.8 1 3246 0 0 3246
## 3 150 0.000000 297.9 8 481 0 0 481
## 4 278 0.000000 165.0 6 1411 0 0 1411
## 5 109 4.818617 279.1 5 4199 0 0 4199
## 6 105 0.000000 579.0 4 2352 0 0 2352
## i.ep_TIA tia t.ep_Angor i.ep_Angor angor t.ep_AMI i.ep_AMI ami
## 1 0 0 4199 0 0 4199 0 0
## 2 0 0 3246 0 0 3246 0 0
## 3 0 0 481 0 0 481 0 0
## 4 0 0 1411 0 0 1411 0 0
## 5 0 0 4199 0 0 4199 0 0
## 6 0 0 2352 0 0 2352 0 0
bbdd_clusters_9 %>% group_by(sex_female) %>% group_split %>% map(function(data){
autoplot(survfit(Surv(t.ep_StrokeI, i.ep_StrokeI) ~ clusters, data=data), main="Stroke survival", censor.size = 0, surv.geom = "line",
conf.int.fill=NULL)
})
## [[1]]
##
## [[2]]
summary(as.factor(bbdd_clusters_9$clusters))
## 1 2 3 4 5 6 7 8 9
## 191 215 224 356 310 163 334 222 199
bbdd_clusters_9$clusters <- as.factor(bbdd_clusters_9$clusters)
clusters_men <- bbdd_clusters_9 %>% filter(sex_female=="Men") #FILTRO CUANDO NO APARECE EVENTO
clusters_men$clusters <- relevel(clusters_men$clusters, 9) #REFERENCIA MAYOR SUPERVIVENCIA
clusters_women <- bbdd_clusters_9 %>% filter(sex_female=="Women")
clusters_women$clusters <- relevel(clusters_women$clusters, ref = 1)
clusters = list(clusters_men, clusters_women)
bbdd_clusters_9_surv <- clusters %>%
map(function(data) {mod = coxph(Surv(t.ep_StrokeI, i.ep_StrokeI)~age + BMI+ clusters, data=data)
tidy(mod, conf.int= TRUE, exponentiate=TRUE)})
head(bbdd_clusters_9_surv)
## [[1]]
## # A tibble: 10 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.08 0.0166 4.41 0.0000104 1.04 1.11
## 2 BMI 0.995 0.0374 -0.126 0.900 0.925 1.07
## 3 clusters1 3.74 1.10 1.20 0.229 0.436 32.1
## 4 clusters2 3.69 1.16 1.13 0.259 0.383 35.5
## 5 clusters3 3.59 1.08 1.18 0.237 0.431 29.9
## 6 clusters4 4.11 1.08 1.31 0.191 0.494 34.2
## 7 clusters5 2.60 1.23 0.777 0.437 0.234 28.8
## 8 clusters6 7.18 1.12 1.76 0.0781 0.801 64.4
## 9 clusters7 2.89 1.07 0.991 0.322 0.355 23.5
## 10 clusters8 6.14 1.06 1.71 0.0875 0.766 49.3
##
## [[2]]
## # A tibble: 10 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.05 0.0136 3.37 0.000762 1.02 1.08
## 2 BMI 0.953 0.0254 -1.92 0.0553 0.906 1.00
## 3 clusters2 5.71 1.07 1.63 0.103 0.701 46.4
## 4 clusters3 11.1 1.07 2.25 0.0243 1.37 90.7
## 5 clusters4 3.56 1.07 1.19 0.235 0.438 29.0
## 6 clusters5 4.12 1.06 1.34 0.179 0.521 32.7
## 7 clusters6 7.40 1.08 1.85 0.0642 0.889 61.6
## 8 clusters7 8.54 1.04 2.05 0.0401 1.10 66.2
## 9 clusters8 5.67 1.12 1.55 0.122 0.629 51.1
## 10 clusters9 8.22 1.05 2.00 0.0457 1.04 65.0
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_9_surv[[1]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1,6) +
labs(x = "Hazard ratio")
## Warning: Removed 8 rows containing missing values (geom_segment).
## Warning: Removed 2 rows containing missing values (geom_point).
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_9_surv[[2]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1, 6) +
labs(x = "Hazard ratio")
## Warning: Removed 8 rows containing missing values (geom_segment).
## Warning: Removed 4 rows containing missing values (geom_point).
bbdd_clusters_9 %>% group_by(sex_female) %>% group_split %>% map(function(data){
autoplot(survfit(Surv(t.ep_TIA, i.ep_TIA) ~ clusters, data=data), main="TIA survival", censor.size = 0, surv.geom = "line",
conf.int.fill=NULL)
})
## [[1]]
##
## [[2]]
summary(as.factor(bbdd_clusters_9$clusters))
## 1 2 3 4 5 6 7 8 9
## 191 215 224 356 310 163 334 222 199
bbdd_clusters_9$clusters <- as.factor(bbdd_clusters_9$clusters)
clusters_men <- bbdd_clusters_9 %>% filter(clusters !=6, clusters !=5, clusters !=2, sex_female=="Men") #FILTRO CUANDO NO APARECE EVENTO
clusters_men$clusters <- relevel(clusters_men$clusters, 8) #REFERENCIA MAYOR SUPERVIVENCIA
clusters_women <- bbdd_clusters_9 %>% filter(sex_female=="Women")
clusters_women$clusters <- relevel(clusters_women$clusters, ref = 8)
clusters = list(clusters_men, clusters_women)
bbdd_clusters_9_surv <- clusters %>%
map(function(data) {mod = coxph(Surv(t.ep_TIA, i.ep_TIA)~age + BMI+ clusters, data=data)
tidy(mod, conf.int= TRUE, exponentiate=TRUE)})
head(bbdd_clusters_9_surv)
## [[1]]
## # A tibble: 10 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.04 0.0223 1.78 0.0757 0.996 1.09
## 2 BMI 0.973 0.0541 -0.504 0.614 0.875 1.08
## 3 clusters1 2.32 0.872 0.965 0.335 0.420 12.8
## 4 clusters2 NA 0 NA NA NA NA
## 5 clusters3 1.21 0.915 0.209 0.834 0.202 7.27
## 6 clusters4 2.85 0.818 1.28 0.200 0.574 14.2
## 7 clusters5 NA 0 NA NA NA NA
## 8 clusters6 NA 0 NA NA NA NA
## 9 clusters7 1.19 0.868 0.196 0.845 0.216 6.50
## 10 clusters9 1.33 1.00 0.282 0.778 0.186 9.47
##
## [[2]]
## # A tibble: 10 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.04 0.0146 2.56 0.0105 1.01 1.07
## 2 BMI 0.975 0.0278 -0.895 0.371 0.924 1.03
## 3 clusters1 3.10 1.12 1.01 0.313 0.344 27.9
## 4 clusters2 3.70 1.08 1.21 0.227 0.443 30.9
## 5 clusters3 6.92 1.08 1.79 0.0734 0.832 57.6
## 6 clusters4 3.47 1.06 1.18 0.239 0.438 27.5
## 7 clusters5 2.51 1.07 0.859 0.391 0.308 20.4
## 8 clusters6 3.51 1.12 1.12 0.262 0.392 31.5
## 9 clusters7 3.40 1.08 1.13 0.258 0.407 28.3
## 10 clusters9 3.41 1.10 1.12 0.263 0.397 29.3
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_9_surv[[1]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1,8) +
labs(x = "Hazard ratio")
## Warning: Removed 6 rows containing missing values (geom_segment).
## Warning: Removed 3 rows containing missing values (geom_point).
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_9_surv[[2]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1, 8) +
labs(x = "Hazard ratio")
## Warning: Removed 8 rows containing missing values (geom_segment).
bbdd_clusters_9 %>% group_by(sex_female) %>% group_split %>% map(function(data){
autoplot(survfit(Surv(t.ep_AMI, i.ep_AMI) ~ clusters, data=data), main="AMI survival", censor.size = 0, surv.geom = "line",
conf.int.fill=NULL)
})
## [[1]]
##
## [[2]]
summary(as.factor(bbdd_clusters_9$clusters))
## 1 2 3 4 5 6 7 8 9
## 191 215 224 356 310 163 334 222 199
bbdd_clusters_9$clusters <- as.factor(bbdd_clusters_9$clusters)
clusters_men <- bbdd_clusters_9 %>% filter(clusters !=5, sex_female=="Men") #FILTRO CUANDO NO APARECE EVENTO
clusters_men$clusters <- relevel(clusters_men$clusters, 6) #REFERENCIA MAYOR SUPERVIVENCIA
clusters_women <- bbdd_clusters_9 %>% filter(clusters !=8, clusters !=4, clusters !=1, sex_female=="Women")
clusters_women$clusters <- relevel(clusters_women$clusters, ref = 5)
clusters = list(clusters_men, clusters_women)
bbdd_clusters_9_surv <- clusters %>%
map(function(data) {mod = coxph(Surv(t.ep_AMI, i.ep_AMI)~age + BMI+ clusters, data=data)
tidy(mod, conf.int= TRUE, exponentiate=TRUE)})
head(bbdd_clusters_9_surv)
## [[1]]
## # A tibble: 10 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.03 0.0188 1.80 0.0726 0.997 1.07
## 2 BMI 0.938 0.0474 -1.36 0.174 0.854 1.03
## 3 clusters1 2.10 1.13 0.658 0.511 0.231 19.1
## 4 clusters2 3.23 1.12 1.05 0.295 0.360 29.0
## 5 clusters3 1.80 1.10 0.535 0.593 0.210 15.4
## 6 clusters4 2.24 1.10 0.734 0.463 0.260 19.2
## 7 clusters5 NA 0 NA NA NA NA
## 8 clusters7 1.07 1.12 0.0575 0.954 0.118 9.60
## 9 clusters8 2.48 1.10 0.828 0.407 0.289 21.4
## 10 clusters9 1.25 1.23 0.180 0.857 0.113 13.8
##
## [[2]]
## # A tibble: 10 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.05 0.0265 1.73 0.0844 0.994 1.10
## 2 BMI 1.01 0.0443 0.304 0.761 0.929 1.11
## 3 clusters1 NA 0 NA NA NA NA
## 4 clusters2 3.78 0.869 1.53 0.125 0.690 20.8
## 5 clusters3 3.45 1.00 1.24 0.217 0.484 24.6
## 6 clusters4 NA 0 NA NA NA NA
## 7 clusters6 2.66 1.00 0.977 0.329 0.374 18.9
## 8 clusters7 2.56 0.915 1.03 0.305 0.426 15.4
## 9 clusters8 NA 0 NA NA NA NA
## 10 clusters9 4.00 0.867 1.60 0.110 0.732 21.9
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_9_surv[[1]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1,6) +
labs(x = "Hazard ratio")
## Warning: Removed 8 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_point).
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_9_surv[[2]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1, 6) +
labs(x = "Hazard ratio")
## Warning: Removed 8 rows containing missing values (geom_segment).
## Warning: Removed 3 rows containing missing values (geom_point).
bbdd_clusters_9 %>% group_by(sex_female) %>% group_split %>% map(function(data){
autoplot(survfit(Surv(t.ep_Angor, i.ep_Angor) ~ clusters, data=data), main="Angor survival", censor.size = 0, surv.geom = "line",
conf.int.fill=NULL)
})
## [[1]]
##
## [[2]]
summary(as.factor(bbdd_clusters_9$clusters))
## 1 2 3 4 5 6 7 8 9
## 191 215 224 356 310 163 334 222 199
bbdd_clusters_9$clusters <- as.factor(bbdd_clusters_9$clusters)
clusters_men <- bbdd_clusters_9 %>% filter(sex_female=="Men") #FILTRO CUANDO NO APARECE EVENTO
clusters_men$clusters <- relevel(clusters_men$clusters, 9) #REFERENCIA MAYOR SUPERVIVENCIA
clusters_women <- bbdd_clusters_9 %>% filter(sex_female=="Women")
clusters_women$clusters <- relevel(clusters_women$clusters, ref = 4)
clusters = list(clusters_men, clusters_women)
bbdd_clusters_9_surv <- clusters %>%
map(function(data) {mod = coxph(Surv(t.ep_Angor, i.ep_Angor)~age + BMI+ clusters, data=data)
tidy(mod, conf.int= TRUE, exponentiate=TRUE)})
head(bbdd_clusters_9_surv)
## [[1]]
## # A tibble: 10 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.03 0.0168 1.92 0.0548 0.999 1.07
## 2 BMI 1.08 0.0347 2.15 0.0317 1.01 1.15
## 3 clusters1 2.83 1.16 0.898 0.369 0.292 27.5
## 4 clusters2 6.03 1.10 1.64 0.101 0.703 51.7
## 5 clusters3 5.13 1.05 1.55 0.121 0.649 40.5
## 6 clusters4 2.69 1.12 0.884 0.377 0.300 24.1
## 7 clusters5 3.36 1.16 1.05 0.294 0.349 32.4
## 8 clusters6 2.79 1.23 0.836 0.403 0.251 31.0
## 9 clusters7 1.33 1.16 0.248 0.804 0.138 12.8
## 10 clusters8 1.36 1.23 0.251 0.802 0.123 15.1
##
## [[2]]
## # A tibble: 10 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.01 0.0191 0.730 0.465 0.977 1.05
## 2 BMI 0.967 0.0375 -0.897 0.370 0.898 1.04
## 3 clusters1 6.25 1.16 1.59 0.113 0.649 60.1
## 4 clusters2 6.53 1.12 1.68 0.0935 0.729 58.4
## 5 clusters3 2.80 1.41 0.727 0.467 0.175 44.7
## 6 clusters5 6.17 1.07 1.70 0.0890 0.758 50.3
## 7 clusters6 4.28 1.23 1.19 0.235 0.388 47.3
## 8 clusters7 4.40 1.15 1.28 0.200 0.457 42.3
## 9 clusters8 2.32 1.42 0.595 0.552 0.145 37.3
## 10 clusters9 5.25 1.15 1.44 0.151 0.546 50.4
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_9_surv[[1]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1,8) +
labs(x = "Hazard ratio")
## Warning: Removed 8 rows containing missing values (geom_segment).
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_9_surv[[2]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1, 8) +
labs(x = "Hazard ratio")
## Warning: Removed 8 rows containing missing values (geom_segment).
bbdd_clusters_10 <- create_bbdd_clusters(gmm10)
head(bbdd_clusters_10)
## rowId age BMI HbA1c Leukocytes Monocytes cLDL sex_female Tg cHDL DBP SBP
## 1 3785 61 19.27 6.5 6.6 8.8 123 Men 87 75 80 150
## 2 4678 67 24.71 7.7 9.5 5.4 83 Men 62 60 60 110
## 3 6049 56 34.32 7.9 7.3 5.6 110 Men 74 52 84 123
## 4 25754 70 30.11 10.2 8.1 7.0 220 Men 168 47 72 130
## 5 27214 71 24.91 5.7 3.5 7.1 122 Men 214 58 60 120
## 6 30046 71 33.96 6.5 6.6 11.5 107 Men 160 48 75 145
## Glucose TimeT2DM Ferritin clusters t.ep_StrokeI i.ep_StrokeI stroke t.ep_TIA
## 1 136 10.023272 100.7 7 4199 0 0 4199
## 2 227 8.996578 154.8 9 3246 0 0 3246
## 3 150 0.000000 297.9 8 481 0 0 481
## 4 278 0.000000 165.0 6 1411 0 0 1411
## 5 109 4.818617 279.1 4 4199 0 0 4199
## 6 105 0.000000 579.0 4 2352 0 0 2352
## i.ep_TIA tia t.ep_Angor i.ep_Angor angor t.ep_AMI i.ep_AMI ami
## 1 0 0 4199 0 0 4199 0 0
## 2 0 0 3246 0 0 3246 0 0
## 3 0 0 481 0 0 481 0 0
## 4 0 0 1411 0 0 1411 0 0
## 5 0 0 4199 0 0 4199 0 0
## 6 0 0 2352 0 0 2352 0 0
bbdd_clusters_10 %>% group_by(sex_female) %>% group_split %>% map(function(data){
autoplot(survfit(Surv(t.ep_StrokeI, i.ep_StrokeI) ~ clusters, data=data), main="Stroke survival", censor.size = 0, surv.geom = "line",
conf.int.fill=NULL)
})
## [[1]]
##
## [[2]]
summary(as.factor(bbdd_clusters_10$clusters))
## 1 2 3 4 5 6 7 8 9 10
## 199 198 249 286 299 211 260 209 140 163
bbdd_clusters_10$clusters <- as.factor(bbdd_clusters_10$clusters)
clusters_men <- bbdd_clusters_10 %>% filter(clusters != 9, clusters !=5, sex_female=="Men") #FILTRO CUANDO NO APARECE EVENTO
clusters_men$clusters <- relevel(clusters_men$clusters, 2) #REFERENCIA MAYOR SUPERVIVENCIA
clusters_women <- bbdd_clusters_10 %>% filter(clusters !=1, sex_female=="Women")
clusters_women$clusters <- relevel(clusters_women$clusters, ref = 4)
clusters = list(clusters_men, clusters_women)
bbdd_clusters_10_surv <- clusters %>%
map(function(data) {mod = coxph(Surv(t.ep_StrokeI, i.ep_StrokeI)~age + BMI+ clusters, data=data)
tidy(mod, conf.int= TRUE, exponentiate=TRUE)})
head(bbdd_clusters_10_surv)
## [[1]]
## # A tibble: 11 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.07 0.0166 4.18 0.0000293 1.04 1.11
## 2 BMI 0.999 0.0373 -0.0385 0.969 0.928 1.07
## 3 clusters1 1.26 0.838 0.271 0.786 0.243 6.49
## 4 clusters3 1.18 0.791 0.213 0.831 0.251 5.58
## 5 clusters4 1.40 0.803 0.417 0.677 0.290 6.74
## 6 clusters5 NA 0 NA NA NA NA
## 7 clusters6 2.08 0.868 0.842 0.400 0.379 11.4
## 8 clusters7 0.705 0.913 -0.382 0.702 0.118 4.22
## 9 clusters8 2.13 0.792 0.954 0.340 0.451 10.0
## 10 clusters9 NA 0 NA NA NA NA
## 11 clusters10 2.03 0.837 0.844 0.399 0.393 10.4
##
## [[2]]
## # A tibble: 11 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.05 0.0136 3.27 0.00108 1.02 1.07
## 2 BMI 0.951 0.0264 -1.89 0.0585 0.903 1.00
## 3 clusters1 NA 0 NA NA NA NA
## 4 clusters2 2.13 0.708 1.07 0.286 0.532 8.52
## 5 clusters3 6.38 0.667 2.78 0.00545 1.73 23.6
## 6 clusters5 2.03 0.659 1.07 0.284 0.557 7.37
## 7 clusters6 3.33 0.646 1.86 0.0626 0.939 11.8
## 8 clusters7 2.57 0.677 1.40 0.163 0.683 9.70
## 9 clusters8 2.28 0.765 1.08 0.280 0.510 10.2
## 10 clusters9 2.51 0.764 1.20 0.229 0.561 11.2
## 11 clusters10 2.56 0.731 1.29 0.198 0.612 10.8
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_10_surv[[1]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1,4) +
labs(x = "Hazard ratio")
## Warning: Removed 9 rows containing missing values (geom_segment).
## Warning: Removed 2 rows containing missing values (geom_point).
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_10_surv[[2]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1, 4) +
labs(x = "Hazard ratio")
## Warning: Removed 9 rows containing missing values (geom_segment).
## Removed 2 rows containing missing values (geom_point).
bbdd_clusters_10 %>% group_by(sex_female) %>% group_split %>% map(function(data){
autoplot(survfit(Surv(t.ep_TIA, i.ep_TIA) ~ clusters, data=data), main="TIA survival", censor.size = 0, surv.geom = "line",
conf.int.fill=NULL)
})
## [[1]]
##
## [[2]]
summary(as.factor(bbdd_clusters_10$clusters))
## 1 2 3 4 5 6 7 8 9 10
## 199 198 249 286 299 211 260 209 140 163
bbdd_clusters_10$clusters <- as.factor(bbdd_clusters_10$clusters)
clusters_men <- bbdd_clusters_10 %>% filter(clusters != 6, clusters != 5,
clusters != 2,sex_female=="Men") #FILTRO CUANDO NO APARECE EVENTO
clusters_men$clusters <- relevel(clusters_men$clusters, 9) #REFERENCIA MAYOR SUPERVIVENCIA
clusters_women <- bbdd_clusters_10 %>% filter(sex_female=="Women")
clusters_women$clusters <- relevel(clusters_women$clusters, ref = 8)
clusters = list(clusters_men, clusters_women)
bbdd_clusters_10_surv <- clusters %>%
map(function(data) {mod = coxph(Surv(t.ep_TIA, i.ep_TIA)~age + BMI+ clusters, data=data)
tidy(mod, conf.int= TRUE, exponentiate=TRUE)})
head(bbdd_clusters_10_surv)
## [[1]]
## # A tibble: 11 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.04 0.0220 1.85 0.0641 0.998 1.09
## 2 BMI 0.967 0.0551 -0.604 0.546 0.868 1.08
## 3 clusters1 1.28 1.23 0.199 0.842 0.114 14.3
## 4 clusters2 NA 0 NA NA NA NA
## 5 clusters3 1.71 1.10 0.488 0.625 0.199 14.7
## 6 clusters4 2.86 1.09 0.966 0.334 0.339 24.1
## 7 clusters5 NA 0 NA NA NA NA
## 8 clusters6 NA 0 NA NA NA NA
## 9 clusters7 1.10 1.23 0.0771 0.939 0.0992 12.2
## 10 clusters8 1.15 1.23 0.117 0.907 0.104 12.9
## 11 clusters10 2.76 1.16 0.877 0.381 0.285 26.7
##
## [[2]]
## # A tibble: 11 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.04 0.0146 2.62 0.00872 1.01 1.07
## 2 BMI 0.974 0.0280 -0.948 0.343 0.922 1.03
## 3 clusters1 2.26 1.16 0.704 0.482 0.234 21.8
## 4 clusters2 3.23 1.10 1.07 0.286 0.375 27.7
## 5 clusters3 7.43 1.08 1.86 0.0635 0.893 61.8
## 6 clusters4 3.69 1.08 1.21 0.227 0.444 30.7
## 7 clusters5 3.13 1.06 1.08 0.282 0.391 25.0
## 8 clusters6 3.11 1.08 1.05 0.295 0.372 26.0
## 9 clusters7 4.16 1.07 1.33 0.182 0.512 33.9
## 10 clusters9 2.25 1.23 0.663 0.507 0.204 24.9
## 11 clusters10 3.63 1.12 1.15 0.249 0.405 32.5
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_10_surv[[1]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1,6) +
labs(x = "Hazard ratio")
## Warning: Removed 9 rows containing missing values (geom_segment).
## Warning: Removed 3 rows containing missing values (geom_point).
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_10_surv[[2]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1, 6) +
labs(x = "Hazard ratio")
## Warning: Removed 9 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_point).
bbdd_clusters_10 %>% group_by(sex_female) %>% group_split %>% map(function(data){
autoplot(survfit(Surv(t.ep_AMI, i.ep_AMI) ~ clusters, data=data), main="AMI survival", censor.size = 0, surv.geom = "line",
conf.int.fill=NULL)
})
## [[1]]
##
## [[2]]
summary(as.factor(bbdd_clusters_10$clusters))
## 1 2 3 4 5 6 7 8 9 10
## 199 198 249 286 299 211 260 209 140 163
bbdd_clusters_10$clusters <- as.factor(bbdd_clusters_10$clusters)
clusters_men <- bbdd_clusters_10 %>% filter(clusters != 5, sex_female=="Men") #FILTRO CUANDO NO APARECE EVENTO
clusters_men$clusters <- relevel(clusters_men$clusters, 10) #REFERENCIA MAYOR SUPERVIVENCIA
clusters_women <- bbdd_clusters_10 %>% filter(clusters !=8,clusters != 1, clusters != 4, sex_female=="Women")
clusters_women$clusters <- relevel(clusters_women$clusters, ref = 5)
clusters = list(clusters_men, clusters_women)
bbdd_clusters_10_surv <- clusters %>%
map(function(data) {mod = coxph(Surv(t.ep_AMI, i.ep_AMI)~age + BMI+ clusters, data=data)
tidy(mod, conf.int= TRUE, exponentiate=TRUE)})
head(bbdd_clusters_10_surv)
## [[1]]
## # A tibble: 11 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.03 0.0189 1.54 0.123 0.992 1.07
## 2 BMI 0.944 0.0470 -1.23 0.219 0.861 1.03
## 3 clusters1 4.27 1.08 1.34 0.180 0.511 35.7
## 4 clusters2 2.48 1.22 0.740 0.459 0.225 27.3
## 5 clusters3 1.70 1.10 0.486 0.627 0.199 14.6
## 6 clusters4 2.69 1.10 0.902 0.367 0.314 23.1
## 7 clusters5 NA 0 NA NA NA NA
## 8 clusters6 2.31 1.23 0.684 0.494 0.209 25.6
## 9 clusters7 1.77 1.16 0.494 0.621 0.184 17.1
## 10 clusters8 3.37 1.10 1.11 0.268 0.393 28.9
## 11 clusters9 1.04 1.42 0.0270 0.978 0.0646 16.7
##
## [[2]]
## # A tibble: 11 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.05 0.0259 1.75 0.0795 0.995 1.10
## 2 BMI 1.01 0.0446 0.206 0.837 0.925 1.10
## 3 clusters1 NA 0 NA NA NA NA
## 4 clusters2 7.19 1.12 1.76 0.0782 0.800 64.7
## 5 clusters3 3.33 1.42 0.850 0.395 0.208 53.4
## 6 clusters4 NA 0 NA NA NA NA
## 7 clusters6 2.82 1.23 0.843 0.399 0.253 31.3
## 8 clusters7 4.76 1.16 1.35 0.177 0.495 45.8
## 9 clusters8 NA 0 NA NA NA NA
## 10 clusters9 9.28 1.15 1.93 0.0538 0.964 89.2
## 11 clusters10 7.29 1.16 1.72 0.0854 0.758 70.2
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_10_surv[[1]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1,6) +
labs(x = "Hazard ratio")
## Warning: Removed 9 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_point).
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_10_surv[[2]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1, 6) +
labs(x = "Hazard ratio")
## Warning: Removed 9 rows containing missing values (geom_segment).
## Warning: Removed 6 rows containing missing values (geom_point).
bbdd_clusters_10 %>% group_by(sex_female) %>% group_split %>% map(function(data){
autoplot(survfit(Surv(t.ep_Angor, i.ep_Angor) ~ clusters, data=data), main="Angor survival", censor.size = 0, surv.geom = "line",
conf.int.fill=NULL)
})
## [[1]]
##
## [[2]]
summary(as.factor(bbdd_clusters_10$clusters))
## 1 2 3 4 5 6 7 8 9 10
## 199 198 249 286 299 211 260 209 140 163
bbdd_clusters_10$clusters <- as.factor(bbdd_clusters_10$clusters)
clusters_men <- bbdd_clusters_10 %>% filter(clusters != 9, sex_female=="Men") #FILTRO CUANDO NO APARECE EVENTO
clusters_men$clusters <- relevel(clusters_men$clusters, 7) #REFERENCIA MAYOR SUPERVIVENCIA
clusters_women <- bbdd_clusters_10 %>% filter(sex_female=="Women")
clusters_women$clusters <- relevel(clusters_women$clusters, ref = 8)
clusters = list(clusters_men, clusters_women)
bbdd_clusters_10_surv <- clusters %>%
map(function(data) {mod = coxph(Surv(t.ep_Angor, i.ep_Angor)~age + BMI+ clusters, data=data)
tidy(mod, conf.int= TRUE, exponentiate=TRUE)})
head(bbdd_clusters_10_surv)
## [[1]]
## # A tibble: 11 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.03 0.0170 1.57 0.116 0.993 1.06
## 2 BMI 1.08 0.0350 2.16 0.0305 1.01 1.16
## 3 clusters1 6.76 1.10 1.74 0.0820 0.785 58.2
## 4 clusters2 6.19 1.16 1.58 0.114 0.644 59.6
## 5 clusters3 4.36 1.07 1.38 0.169 0.535 35.5
## 6 clusters4 3.23 1.12 1.05 0.294 0.361 29.0
## 7 clusters5 4.44 1.16 1.29 0.197 0.460 42.9
## 8 clusters6 3.40 1.23 0.997 0.319 0.306 37.8
## 9 clusters8 1.97 1.23 0.554 0.579 0.179 21.8
## 10 clusters9 NA 0 NA NA NA NA
## 11 clusters10 8.23 1.10 1.92 0.0544 0.961 70.5
##
## [[2]]
## # A tibble: 11 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 age 1.02 0.0192 0.846 0.398 0.979 1.06
## 2 BMI 0.968 0.0370 -0.865 0.387 0.901 1.04
## 3 clusters1 3.42 1.12 1.10 0.273 0.379 30.8
## 4 clusters2 2.16 1.16 0.664 0.506 0.223 20.9
## 5 clusters3 1.28 1.42 0.173 0.862 0.0798 20.5
## 6 clusters4 0.663 1.42 -0.291 0.771 0.0414 10.6
## 7 clusters5 1.98 1.10 0.625 0.532 0.232 17.0
## 8 clusters6 0.570 1.42 -0.396 0.692 0.0354 9.18
## 9 clusters7 2.51 1.12 0.823 0.411 0.280 22.6
## 10 clusters9 2.38 1.23 0.708 0.479 0.216 26.3
## 11 clusters10 2.80 1.15 0.890 0.373 0.291 26.9
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_10_surv[[1]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1,6) +
labs(x = "Hazard ratio")
## Warning: Removed 9 rows containing missing values (geom_segment).
## Warning: Removed 4 rows containing missing values (geom_point).
options(repr.plot.width = 9, repr.plot.height = 3)
bbdd_clusters_10_surv[[2]] %>%
ggplot(aes(estimate, term)) +
geom_vline(xintercept = 1, lty = "dashed") +
geom_linerange(aes(xmin = conf.low, xmax = conf.high)) +
geom_point(aes(color = term)) +
theme_bw() +
theme(legend.position = "none") +
xlim(-1, 6) +
labs(x = "Hazard ratio")
## Warning: Removed 9 rows containing missing values (geom_segment).
bbdd_clusters_6 %>% group_by(sex_female, .add=TRUE) %>% group_split() %>% map(function(dat){
boxplot(dat$BMI~dat$clusters,col=viridis(8), main=paste("BMI DISTRIBUTION IN", if_else(dat$sex_female[1]=="Men", "MALE", "FEMALE")),
xlab="CLUSTER", ylab="BMI")
})
## [[1]]
## [[1]]$stats
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 19.02 20.32 19.270 20.440 20.55 22.130
## [2,] 26.81 25.93 26.500 27.115 26.40 27.380
## [3,] 29.77 28.74 28.795 29.550 28.45 29.620
## [4,] 33.25 31.43 31.400 32.715 31.91 32.915
## [5,] 41.80 38.40 38.600 40.520 40.16 38.970
##
## [[1]]$n
## [1] 158 101 282 123 166 67
##
## [[1]]$conf
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 28.9605 27.87531 28.33397 28.7522 27.7743 28.55159
## [2,] 30.5795 29.60469 29.25603 30.3478 29.1257 30.68841
##
## [[1]]$out
## [1] 43.25 40.64 40.04 41.08 44.69 49.96 39.68 42.31 40.12 42.72 53.52 41.27
## [13] 16.31 42.61 42.07 41.87 51.53 45.88 49.45 41.74
##
## [[1]]$group
## [1] 1 2 2 3 3 3 3 3 3 4 4 4 4 5 5 5 5 6 6 6
##
## [[1]]$names
## [1] "1" "2" "3" "4" "5" "6"
##
##
## [[2]]
## [[2]]$stats
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 19.740 19.560 19.010 16.75 19.110 18.270
## [2,] 27.225 27.430 26.910 27.09 27.080 27.590
## [3,] 30.935 31.385 30.900 30.80 30.330 32.060
## [4,] 34.010 35.430 34.855 35.54 35.515 36.345
## [5,] 43.420 47.070 45.920 47.87 47.250 49.050
##
## [[2]]$n
## [1] 152 178 156 367 300 164
##
## [[2]]$conf
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 30.06547 30.43759 29.89495 30.10308 29.56055 30.97983
## [2,] 31.80453 32.33241 31.90505 31.49692 31.09945 33.14017
##
## [[2]]$out
## [1] 56.14 14.00 51.86 44.38 51.02 49.10 44.60 49.34 49.95 51.93 52.34 51.47
## [13] 49.49 50.47 49.73 50.94 53.95 48.91 52.27 59.14 54.65 51.86
##
## [[2]]$group
## [1] 1 1 1 1 1 1 1 2 2 3 3 4 4 4 4 4 4 4 5 5 6 6
##
## [[2]]$names
## [1] "1" "2" "3" "4" "5" "6"
bbdd_clusters_6 %>% group_by(sex_female, .add=TRUE) %>% group_split() %>% map2(bbdd_covar %>% group_by(sex_female) %>% group_split,
function(dat, weight){
dat <- dat %>% left_join(weight %>% select(weight, rowId), by="rowId")
boxplot(dat$weight~dat$clusters,col=viridis(8), main=paste("WEIGTH DISTRIBUTION IN", if_else(dat$sex_female[1]=="Men", "MALE", "FEMALE")),
xlab="CLUSTER", ylab="BMI")
})
## [[1]]
## [[1]]$stats
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 57 52.0 51.2 44.4 52.0 61.50
## [2,] 75 70.8 72.0 73.9 73.0 74.75
## [3,] 85 80.0 81.0 83.0 80.0 84.90
## [4,] 97 89.0 89.5 94.0 90.9 95.95
## [5,] 125 113.0 115.2 122.1 112.7 116.40
##
## [[1]]$n
## [1] 158 101 282 123 166 67
##
## [[1]]$conf
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 82.23464 77.13867 79.35347 80.13648 77.80489 80.80781
## [2,] 87.76536 82.86133 82.64653 85.86352 82.19511 88.99219
##
## [[1]]$out
## [1] 130.0 140.0 118.2 153.0 118.0 126.3 156.5 125.0 128.0 132.0 121.3 133.3
## [13] 121.0 156.0 123.0 138.9 133.0
##
## [[1]]$group
## [1] 2 3 3 3 3 4 4 4 5 5 5 5 5 5 5 6 6
##
## [[1]]$names
## [1] "1" "2" "3" "4" "5" "6"
##
##
## [[2]]
## [[2]]$stats
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 41.50 44.6 44.50 38.30 38.50 35.80
## [2,] 64.00 64.2 63.10 63.85 64.00 65.35
## [3,] 74.00 71.5 72.75 72.80 71.50 76.80
## [4,] 82.75 87.5 85.00 84.25 84.05 87.35
## [5,] 109.00 122.0 112.50 113.00 114.00 116.90
##
## [[2]]$n
## [1] 152 178 156 367 300 164
##
## [[2]]$conf
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 71.5971 68.74067 69.97962 71.1175 69.67101 74.0857
## [2,] 76.4029 74.25933 75.52038 74.4825 73.32899 79.5143
##
## [[2]]$out
## [1] 128.0 31.5 123.0 130.6 126.3 128.0 134.0 126.5 128.5 122.0 126.0 115.0
## [13] 156.0 124.3 115.0 114.5 121.5 121.0 119.0 161.0 121.0 128.6 150.6 122.0
##
## [[2]]$group
## [1] 1 1 1 1 2 3 3 4 4 4 4 4 4 4 4 5 5 5 5 5 5 6 6 6
##
## [[2]]$names
## [1] "1" "2" "3" "4" "5" "6"
bbdd_clusters_7 %>% group_by(sex_female, .add=TRUE) %>% group_split() %>% map(function(dat){
boxplot(dat$BMI~dat$clusters,col=viridis(8), main=paste("BMI DISTRIBUTION IN", if_else(dat$sex_female[1]=="Men", "MALE", "FEMALE")),
xlab="CLUSTER", ylab="BMI")
})
## [[1]]
## [[1]]$stats
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 20.550 19.27 20.060 20.44 19.02 22.13 21.160
## [2,] 25.970 26.34 26.765 27.09 26.81 27.28 25.930
## [3,] 27.895 28.75 29.380 29.58 29.52 29.62 28.385
## [4,] 30.860 31.91 31.595 33.13 32.60 32.35 32.100
## [5,] 37.550 40.14 38.600 41.27 40.37 38.97 40.120
##
## [[1]]$n
## [1] 98 90 247 93 162 69 138
##
## [[1]]$conf
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 27.11454 27.82234 28.89443 28.59042 28.80125 28.65564 27.55514
## [2,] 28.67546 29.67766 29.86557 30.56958 30.23875 30.58436 29.21486
##
## [[1]]$out
## [1] 42.07 41.87 38.63 51.53 40.16 40.64 42.61 41.08 44.69 49.96 39.68 42.72
## [13] 53.52 16.31 43.25 41.80 45.88 49.45 41.74 42.31
##
## [[1]]$group
## [1] 1 1 1 1 1 2 3 3 3 3 3 4 4 4 5 5 6 6 6 7
##
## [[1]]$names
## [1] "1" "2" "3" "4" "5" "6" "7"
##
##
## [[2]]
## [[2]]$stats
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 19.720 19.56 19.110 17.970 16.750 18.270 19.010
## [2,] 26.705 27.41 28.055 26.860 27.595 27.685 27.040
## [3,] 30.850 31.26 31.400 30.235 30.800 31.945 31.105
## [4,] 34.705 34.96 36.235 34.550 36.255 35.840 34.720
## [5,] 45.920 46.09 44.600 45.610 47.250 45.700 44.220
##
## [[2]]$n
## [1] 243 187 103 262 284 144 94
##
## [[2]]$conf
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 30.03914 30.38767 30.12652 29.48436 29.98808 30.87126 29.85343
## [2,] 31.66086 32.13233 32.67348 30.98564 31.61192 33.01874 32.35657
##
## [[2]]$out
## [1] 51.47 14.00 51.86 51.93 50.94 52.34 47.36 51.02 49.10 48.91 49.34 56.14
## [13] 47.07 49.95 46.66 46.66 49.73 47.87 53.95 52.27 49.49 50.47 59.14 54.65
## [25] 49.05 51.86 48.87 46.80
##
## [[2]]$group
## [1] 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 4 4 4 4 5 5 5 5 6 6 6 6 7
##
## [[2]]$names
## [1] "1" "2" "3" "4" "5" "6" "7"
bbdd_clusters_7 %>% group_by(sex_female, .add=TRUE) %>% group_split() %>% map2(bbdd_covar %>% group_by(sex_female) %>% group_split,
function(dat, weight){
dat <- dat %>% left_join(weight %>% select(weight, rowId), by="rowId")
boxplot(dat$weight~dat$clusters,col=viridis(8), main=paste("WEIGTH DISTRIBUTION IN", if_else(dat$sex_female[1]=="Men", "MALE", "FEMALE")),
xlab="CLUSTER", ylab="BMI")
})
## [[1]]
## [[1]]$stats
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 52.00 51.2 52.00 44.4 61.1 59.2 54.0
## [2,] 70.00 71.7 74.65 75.3 75.0 74.5 70.8
## [3,] 77.45 79.9 82.00 84.1 83.9 84.0 81.0
## [4,] 88.70 91.0 90.00 97.0 94.5 95.5 90.0
## [5,] 112.70 116.0 112.00 126.3 121.3 116.4 118.0
##
## [[1]]$n
## [1] 98 90 247 93 162 69 138
##
## [[1]]$conf
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 74.4654 76.68565 80.45682 80.54471 81.47934 80.0056 78.41763
## [2,] 80.4346 83.11435 83.54318 87.65529 86.32066 87.9944 83.58237
##
## [[1]]$out
## [1] 133.3 121.0 156.0 123.0 130.0 128.0 132.0 113.5 115.0 140.0 118.2 153.0
## [13] 114.0 156.5 125.0 138.9 133.0
##
## [[1]]$group
## [1] 1 1 1 1 2 3 3 3 3 3 3 3 3 4 5 6 6
##
## [[1]]$names
## [1] "1" "2" "3" "4" "5" "6" "7"
##
##
## [[2]]
## [[2]]$stats
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 31.5 44.60 43.00 38.30 44.50 35.80 38.5
## [2,] 62.7 64.30 67.55 63.00 65.00 65.35 62.0
## [3,] 73.0 71.50 75.00 70.25 74.50 77.25 71.7
## [4,] 84.0 85.75 84.60 81.00 85.85 87.00 83.0
## [5,] 113.0 115.00 108.00 107.80 114.50 116.90 112.2
##
## [[2]]$n
## [1] 243 187 103 262 284 144 94
##
## [[2]]$conf
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 70.84109 69.02164 72.34562 68.49297 72.54519 74.39942 68.27774
## [2,] 75.15891 73.97836 77.65438 72.00703 76.45481 80.10058 75.12226
##
## [[2]]$out
## [1] 128.5 123.0 128.0 156.0 134.0 124.3 130.6 126.3 128.0 122.0 120.0 128.6
## [13] 41.5 126.5 109.9 113.3 115.0 115.0 122.0 121.5 126.0 119.0 161.0 121.0
## [25] 150.6 121.0 122.0
##
## [[2]]$group
## [1] 1 1 1 1 1 1 1 2 2 2 2 3 3 4 4 4 4 4 5 5 5 5 5 5 6 6 6
##
## [[2]]$names
## [1] "1" "2" "3" "4" "5" "6" "7"
bbdd_clusters_8 %>% group_by(sex_female, .add=TRUE) %>% group_split() %>% map(function(dat){
boxplot(dat$BMI~dat$clusters,col=viridis(8), main=paste("BMI DISTRIBUTION IN", if_else(dat$sex_female[1]=="Men", "MALE", "FEMALE")),
xlab="CLUSTER", ylab="BMI")
})
## [[1]]
## [[1]]$stats
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 21.55 20.320 22.210 20.44 20.55 22.13 19.27 19.020
## [2,] 26.09 26.480 27.435 26.70 26.42 27.38 25.91 26.725
## [3,] 28.25 29.380 29.640 28.68 28.64 30.09 28.30 29.840
## [4,] 30.99 32.215 32.035 32.45 32.65 32.57 31.48 32.500
## [5,] 36.85 40.640 38.600 39.93 40.16 38.97 39.68 40.370
##
## [[1]]$n
## [1] 65 75 151 82 106 63 207 148
##
## [[1]]$conf
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 27.28972 28.33369 29.04854 27.67673 27.68392 29.05687 27.68832 29.08997
## [2,] 29.21028 30.42631 30.23146 29.68327 29.59608 31.12313 28.91168 30.59003
##
## [[1]]$out
## [1] 41.87 40.52 41.08 40.15 39.12 41.27 40.14 42.72 53.52 42.61 42.07 51.53
## [13] 45.88 49.45 41.74 44.69 49.96 42.31 40.12 16.31 43.25 41.80
##
## [[1]]$group
## [1] 1 1 3 3 3 3 3 4 4 5 5 5 6 6 6 7 7 7 7 7 8 8
##
## [[1]]$names
## [1] "1" "2" "3" "4" "5" "6" "7" "8"
##
##
## [[2]]
## [[2]]$stats
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 14.00 19.560 19.74 17.970 16.75 20.45 18.27 19.02
## [2,] 26.62 27.890 28.20 26.745 27.56 28.20 26.83 27.61
## [3,] 30.83 31.495 31.43 30.240 30.70 32.05 31.11 31.84
## [4,] 35.49 35.210 34.19 35.050 35.96 35.56 34.30 36.21
## [5,] 47.36 44.850 42.06 46.660 47.25 44.36 45.06 48.87
##
## [[2]]$n
## [1] 261 162 93 184 257 113 134 113
##
## [[2]]$conf
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 29.96252 30.58632 30.44861 29.27264 29.87212 30.95605 30.09041 30.56175
## [2,] 31.69748 32.40368 32.41139 31.20736 31.52788 33.14395 32.12959 33.11825
##
## [[2]]$out
## [1] 51.47 51.86 51.93 50.94 52.34 59.14 49.10 48.91 49.34 56.14 46.80 49.95
## [13] 43.47 44.38 19.11 44.60 47.87 53.95 52.27 49.49 50.47 49.73 51.02 46.73
## [25] 49.05 45.92 54.65 51.86
##
## [[2]]$group
## [1] 1 1 1 1 1 1 1 1 2 2 2 2 3 3 3 3 4 4 5 5 5 5 5 6 7 7 8 8
##
## [[2]]$names
## [1] "1" "2" "3" "4" "5" "6" "7" "8"
bbdd_clusters_8 %>% group_by(sex_female) %>% group_split() %>% map(function(dat){
group_by(dat, clusters) %>%
summarise(
count = n(),
mean = mean(BMI, na.rm = TRUE),
sd = sd(BMI, na.rm = TRUE)
)})
## [[1]]
## # A tibble: 8 x 4
## clusters count mean sd
## <fct> <int> <dbl> <dbl>
## 1 1 65 28.7 3.98
## 2 2 75 29.4 4.21
## 3 3 151 29.8 3.90
## 4 4 82 30.1 5.29
## 5 5 106 29.7 5.06
## 6 6 63 30.7 5.00
## 7 7 207 28.9 4.67
## 8 8 148 30.0 4.32
##
## [[2]]
## # A tibble: 8 x 4
## clusters count mean sd
## <fct> <int> <dbl> <dbl>
## 1 1 261 31.6 6.75
## 2 2 162 32.0 6.07
## 3 3 93 31.7 5.49
## 4 4 184 31.3 6.23
## 5 5 257 31.9 6.17
## 6 6 113 31.9 5.52
## 7 7 134 31.1 5.84
## 8 8 113 32.5 6.60
bbdd_clusters_8 %>% group_by(sex_female, .add=TRUE) %>% group_split() %>% map(function(dat){
res.aov <- aov(BMI ~ clusters, data = dat)
print(summary(res.aov))
print(TukeyHSD(res.aov))
})
## Df Sum Sq Mean Sq F value Pr(>F)
## clusters 7 270 38.62 1.872 0.071 .
## Residuals 889 18337 20.63
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = BMI ~ clusters, data = dat)
##
## $clusters
## diff lwr upr p adj
## 2-1 0.68571282 -1.6525874 3.0240131 0.9868721
## 3-1 1.12944880 -0.9174920 3.1763896 0.7026795
## 4-1 1.36813884 -0.9233547 3.6596323 0.6107179
## 5-1 1.02163861 -1.1521235 3.1954007 0.8442867
## 6-1 1.95063980 -0.4888646 4.3901442 0.2283958
## 7-1 0.21631958 -1.7455321 2.1781712 0.9999773
## 8-1 1.27234615 -0.7808282 3.3255205 0.5632104
## 3-2 0.44373598 -1.5054742 2.3929462 0.9972157
## 4-2 0.68242602 -1.5222046 2.8870566 0.9820009
## 5-2 0.33592579 -1.7460673 2.4179189 0.9997033
## 6-2 1.26492698 -1.0931729 3.6230269 0.7321970
## 7-2 -0.46939324 -2.3290480 1.3902615 0.9946933
## 8-2 0.58663333 -1.3691219 2.5423886 0.9850100
## 4-3 0.23869003 -1.6541161 2.1314961 0.9999434
## 5-3 -0.10781020 -1.8562421 1.6406217 0.9999996
## 6-3 0.82119100 -1.2483389 2.8907209 0.9303552
## 7-3 -0.91312922 -2.3898264 0.5635679 0.5660263
## 8-3 0.14289735 -1.4531282 1.7389229 0.9999946
## 5-4 -0.34650023 -2.3757833 1.6827828 0.9995680
## 6-4 0.58250097 -1.7291931 2.8941951 0.9947486
## 7-4 -1.15181925 -2.9522664 0.6486279 0.5210666
## 8-4 -0.09579268 -1.9953382 1.8037528 0.9999999
## 6-5 0.92900120 -1.2660452 3.1240476 0.9041321
## 7-5 -0.80531902 -2.4533206 0.8426826 0.8157250
## 8-5 0.25070755 -1.5050180 2.0064331 0.9998690
## 7-6 -1.73432022 -3.7197293 0.2510888 0.1384583
## 8-6 -0.67829365 -2.7539892 1.3974019 0.9754691
## 8-7 1.05602657 -0.4292992 2.5413523 0.3772698
##
## Df Sum Sq Mean Sq F value Pr(>F)
## clusters 7 191 27.31 0.711 0.663
## Residuals 1309 50259 38.39
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = BMI ~ clusters, data = dat)
##
## $clusters
## diff lwr upr p adj
## 2-1 0.47494679 -1.406553 2.356446 0.9947271
## 3-1 0.08974169 -2.181962 2.361445 1.0000000
## 4-1 -0.28961373 -2.100381 1.521153 0.9997223
## 5-1 0.35824217 -1.294822 2.011307 0.9979781
## 6-1 0.29871495 -1.819588 2.417018 0.9998805
## 7-1 -0.47246983 -2.471584 1.526644 0.9965002
## 8-1 0.91836097 -1.199942 3.036664 0.8928930
## 3-2 -0.38520510 -2.832480 2.062070 0.9997504
## 4-2 -0.76456052 -2.791232 1.262111 0.9466117
## 5-2 -0.11670462 -2.003804 1.770394 0.9999996
## 6-2 -0.17623184 -2.481818 2.129354 0.9999982
## 7-2 -0.94741662 -3.144000 1.249167 0.8954820
## 8-2 0.44341418 -1.862172 2.749000 0.9990640
## 4-3 -0.37935542 -2.772678 2.013967 0.9997385
## 5-3 0.26850048 -2.007843 2.544844 0.9999644
## 6-3 0.20897326 -2.424717 2.842663 0.9999977
## 7-3 -0.56221152 -3.101025 1.976602 0.9976798
## 8-3 0.82861928 -1.805071 3.462309 0.9803526
## 5-4 0.64785590 -1.168728 2.464440 0.9604330
## 6-4 0.58832868 -1.659907 2.836564 0.9934216
## 7-4 -0.18285610 -2.319165 1.953452 0.9999961
## 8-4 1.20797470 -1.040261 3.456210 0.7313511
## 6-5 -0.05952722 -2.182805 2.063751 1.0000000
## 7-5 -0.83071200 -2.835097 1.173673 0.9138581
## 8-5 0.56011880 -1.563159 2.683397 0.9930902
## 7-6 -0.77118478 -3.173714 1.631344 0.9779674
## 8-6 0.61964602 -1.882931 3.122223 0.9953209
## 8-7 1.39083080 -1.011698 3.793360 0.6491455
## [[1]]
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = BMI ~ clusters, data = dat)
##
## $clusters
## diff lwr upr p adj
## 2-1 0.68571282 -1.6525874 3.0240131 0.9868721
## 3-1 1.12944880 -0.9174920 3.1763896 0.7026795
## 4-1 1.36813884 -0.9233547 3.6596323 0.6107179
## 5-1 1.02163861 -1.1521235 3.1954007 0.8442867
## 6-1 1.95063980 -0.4888646 4.3901442 0.2283958
## 7-1 0.21631958 -1.7455321 2.1781712 0.9999773
## 8-1 1.27234615 -0.7808282 3.3255205 0.5632104
## 3-2 0.44373598 -1.5054742 2.3929462 0.9972157
## 4-2 0.68242602 -1.5222046 2.8870566 0.9820009
## 5-2 0.33592579 -1.7460673 2.4179189 0.9997033
## 6-2 1.26492698 -1.0931729 3.6230269 0.7321970
## 7-2 -0.46939324 -2.3290480 1.3902615 0.9946933
## 8-2 0.58663333 -1.3691219 2.5423886 0.9850100
## 4-3 0.23869003 -1.6541161 2.1314961 0.9999434
## 5-3 -0.10781020 -1.8562421 1.6406217 0.9999996
## 6-3 0.82119100 -1.2483389 2.8907209 0.9303552
## 7-3 -0.91312922 -2.3898264 0.5635679 0.5660263
## 8-3 0.14289735 -1.4531282 1.7389229 0.9999946
## 5-4 -0.34650023 -2.3757833 1.6827828 0.9995680
## 6-4 0.58250097 -1.7291931 2.8941951 0.9947486
## 7-4 -1.15181925 -2.9522664 0.6486279 0.5210666
## 8-4 -0.09579268 -1.9953382 1.8037528 0.9999999
## 6-5 0.92900120 -1.2660452 3.1240476 0.9041321
## 7-5 -0.80531902 -2.4533206 0.8426826 0.8157250
## 8-5 0.25070755 -1.5050180 2.0064331 0.9998690
## 7-6 -1.73432022 -3.7197293 0.2510888 0.1384583
## 8-6 -0.67829365 -2.7539892 1.3974019 0.9754691
## 8-7 1.05602657 -0.4292992 2.5413523 0.3772698
##
##
## [[2]]
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = BMI ~ clusters, data = dat)
##
## $clusters
## diff lwr upr p adj
## 2-1 0.47494679 -1.406553 2.356446 0.9947271
## 3-1 0.08974169 -2.181962 2.361445 1.0000000
## 4-1 -0.28961373 -2.100381 1.521153 0.9997223
## 5-1 0.35824217 -1.294822 2.011307 0.9979781
## 6-1 0.29871495 -1.819588 2.417018 0.9998805
## 7-1 -0.47246983 -2.471584 1.526644 0.9965002
## 8-1 0.91836097 -1.199942 3.036664 0.8928930
## 3-2 -0.38520510 -2.832480 2.062070 0.9997504
## 4-2 -0.76456052 -2.791232 1.262111 0.9466117
## 5-2 -0.11670462 -2.003804 1.770394 0.9999996
## 6-2 -0.17623184 -2.481818 2.129354 0.9999982
## 7-2 -0.94741662 -3.144000 1.249167 0.8954820
## 8-2 0.44341418 -1.862172 2.749000 0.9990640
## 4-3 -0.37935542 -2.772678 2.013967 0.9997385
## 5-3 0.26850048 -2.007843 2.544844 0.9999644
## 6-3 0.20897326 -2.424717 2.842663 0.9999977
## 7-3 -0.56221152 -3.101025 1.976602 0.9976798
## 8-3 0.82861928 -1.805071 3.462309 0.9803526
## 5-4 0.64785590 -1.168728 2.464440 0.9604330
## 6-4 0.58832868 -1.659907 2.836564 0.9934216
## 7-4 -0.18285610 -2.319165 1.953452 0.9999961
## 8-4 1.20797470 -1.040261 3.456210 0.7313511
## 6-5 -0.05952722 -2.182805 2.063751 1.0000000
## 7-5 -0.83071200 -2.835097 1.173673 0.9138581
## 8-5 0.56011880 -1.563159 2.683397 0.9930902
## 7-6 -0.77118478 -3.173714 1.631344 0.9779674
## 8-6 0.61964602 -1.882931 3.122223 0.9953209
## 8-7 1.39083080 -1.011698 3.793360 0.6491455
bbdd_clusters_8 %>% group_by(sex_female, .add=TRUE) %>% group_split() %>% map2(bbdd_covar %>% group_by(sex_female) %>% group_split,
function(dat, weight){
dat <- dat %>% left_join(weight %>% select(weight, rowId), by="rowId")
boxplot(dat$weight~dat$clusters,col=viridis(8), main=paste("WEIGTH DISTRIBUTION IN", if_else(dat$sex_female[1]=="Men", "MALE", "FEMALE")),
xlab="CLUSTER", ylab="BMI")
})
## [[1]]
## [[1]]$stats
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 55.5 52.00 58.00 56.50 58.00 61.50 44.40 57.0
## [2,] 70.5 72.75 75.55 73.00 73.50 74.75 70.00 75.5
## [3,] 78.8 80.50 83.50 82.85 80.15 84.00 79.00 83.9
## [4,] 88.7 91.10 90.45 91.80 93.50 95.95 88.75 94.0
## [5,] 113.0 113.00 110.50 119.50 123.00 116.40 115.20 121.3
##
## [[1]]$n
## [1] 65 75 151 82 106 63 207 148
##
## [[1]]$conf
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 75.23326 77.15218 81.58418 79.56974 77.08074 79.7799 76.94092 81.49731
## [2,] 82.36674 83.84782 85.41582 86.13026 83.21926 88.2201 81.05908 86.30269
##
## [[1]]$out
## [1] 121.0 130.0 121.0 118.2 52.0 114.4 122.1 116.0 125.0 114.0 126.3 156.5
## [13] 128.0 132.0 133.3 156.0 138.9 133.0 140.0 153.0 118.0 125.0
##
## [[1]]$group
## [1] 1 2 3 3 3 3 3 3 3 3 4 4 5 5 5 5 6 6 7 7 7 8
##
## [[1]]$names
## [1] "1" "2" "3" "4" "5" "6" "7" "8"
##
##
## [[2]]
## [[2]]$stats
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 31.5 44.6 41.5 38.30 44.5 38.5 35.8 45.1
## [2,] 63.0 64.2 65.0 62.75 65.0 65.5 62.8 66.5
## [3,] 71.8 73.5 75.0 71.00 73.5 74.0 70.0 77.0
## [4,] 84.0 87.5 85.0 83.00 86.0 83.6 82.4 87.0
## [5,] 115.0 120.0 108.0 109.90 114.5 102.5 107.5 116.9
##
## [[2]]$n
## [1] 261 162 93 184 257 113 134 113
##
## [[2]]$conf
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 69.74621 70.60762 71.72323 68.6413 71.43029 71.30973 67.32477 73.95301
## [2,] 73.85379 76.39238 78.27677 73.3587 75.56971 76.69027 72.67523 80.04699
##
## [[2]]$out
## [1] 128.5 123.0 122.0 128.0 156.0 134.0 124.3 161.0 126.3 128.0 128.6 126.5
## [13] 115.0 115.0 122.0 121.5 126.0 119.0 130.6 121.0 112.2 111.0 114.0 121.0
## [25] 112.5 150.6 122.0
##
## [[2]]$group
## [1] 1 1 1 1 1 1 1 1 2 2 3 4 4 4 5 5 5 5 5 5 6 6 7 7 7 8 8
##
## [[2]]$names
## [1] "1" "2" "3" "4" "5" "6" "7" "8"
bbdd_clusters_8 %>% group_by(sex_female, .add=TRUE) %>% group_split() %>% map2(bbdd_covar %>% group_by(sex_female) %>% group_split,
function(dat, weight){
group_by(dat, clusters) %>% left_join(weight %>% select(weight, rowId), by="rowId") %>%
summarise(
count = n(),
mean = mean(weight, na.rm = TRUE),
sd = sd(BMI, na.rm = TRUE)
)})
## [[1]]
## # A tibble: 8 x 4
## clusters count mean sd
## <fct> <int> <dbl> <dbl>
## 1 1 65 80.5 3.98
## 2 2 75 82.6 4.21
## 3 3 151 84.4 3.90
## 4 4 82 84.1 5.29
## 5 5 106 84.4 5.06
## 6 6 63 87.3 5.00
## 7 7 207 81.1 4.67
## 8 8 148 85.4 4.32
##
## [[2]]
## # A tibble: 8 x 4
## clusters count mean sd
## <fct> <int> <dbl> <dbl>
## 1 1 261 75.3 6.75
## 2 2 162 76.1 6.07
## 3 3 93 75.9 5.49
## 4 4 184 73.7 6.23
## 5 5 257 76.3 6.17
## 6 6 113 75.1 5.52
## 7 7 134 73.2 5.84
## 8 8 113 77.2 6.60
bbdd_clusters_8 %>% group_by(sex_female, .add=TRUE) %>% group_split() %>% map2(bbdd_covar %>% group_by(sex_female) %>% group_split,
function(dat, weight){
dat <- dat %>% left_join(weight %>% select(weight, rowId), by="rowId")
res.aov <- aov(weight~ clusters, data = dat)
print(summary(res.aov))
print(TukeyHSD(res.aov))
})
## Df Sum Sq Mean Sq F value Pr(>F)
## clusters 7 3502 500.3 2.218 0.0308 *
## Residuals 889 200505 225.5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = weight ~ clusters, data = dat)
##
## $clusters
## diff lwr upr p adj
## 2-1 2.1591282 -5.5729629 9.8912193 0.9901872
## 3-1 3.8687927 -2.8998561 10.6374414 0.6631978
## 4-1 3.6648030 -3.9125116 11.2421176 0.8236326
## 5-1 3.8863861 -3.3016244 11.0743965 0.7241214
## 6-1 6.8263980 -1.2403462 14.8931423 0.1679714
## 7-1 0.6174954 -5.8697878 7.1047785 0.9999917
## 8-1 4.9035967 -1.8856646 11.6928579 0.3560847
## 3-2 1.7096645 -4.7358171 8.1551460 0.9928068
## 4-2 1.5056748 -5.7844091 8.7957586 0.9985036
## 5-2 1.7272579 -5.1572988 8.6118145 0.9948876
## 6-2 4.6672698 -3.1302932 12.4648328 0.6075796
## 7-2 -1.5416329 -7.6909799 4.6077142 0.9949115
## 8-2 2.7444685 -3.7226557 9.2115926 0.9028709
## 4-3 -0.2039897 -6.4629589 6.0549796 1.0000000
## 5-3 0.0175934 -5.7639716 5.7991584 1.0000000
## 6-3 2.9576054 -3.8857390 9.8009497 0.8939945
## 7-3 -3.2512973 -8.1343130 1.6317184 0.4668432
## 8-3 1.0348040 -4.2427966 6.3124046 0.9989307
## 5-4 0.2215831 -6.4886765 6.9318427 1.0000000
## 6-4 3.1615950 -4.4825172 10.8057073 0.9143170
## 7-4 -3.0473076 -9.0008723 2.9062571 0.7769133
## 8-4 1.2387937 -5.0424608 7.5200482 0.9988895
## 6-5 2.9400120 -4.3183797 10.1984037 0.9227518
## 7-5 -3.2688907 -8.7183614 2.1805799 0.6048645
## 8-5 1.0172106 -4.7884725 6.8228937 0.9994879
## 7-6 -6.2089027 -12.7740835 0.3562781 0.0793080
## 8-6 -1.9228014 -8.7865339 4.9409311 0.9899968
## 8-7 4.2861013 -0.6254468 9.1976494 0.1393397
##
## Df Sum Sq Mean Sq F value Pr(>F)
## clusters 7 1942 277.4 1.061 0.386
## Residuals 1309 342238 261.4
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = weight ~ clusters, data = dat)
##
## $clusters
## diff lwr upr p adj
## 2-1 0.7464453 -4.163330 5.656220 0.9998020
## 3-1 0.5971821 -5.330830 6.525194 0.9999879
## 4-1 -1.6540948 -6.379293 3.071103 0.9642441
## 5-1 1.0207836 -3.292890 5.334457 0.9964723
## 6-1 -0.2377785 -5.765492 5.289935 1.0000000
## 7-1 -2.1461657 -7.362856 3.070525 0.9169357
## 8-1 1.9392127 -3.588500 7.466926 0.9638188
## 3-2 -0.1492632 -6.535430 6.236904 1.0000000
## 4-2 -2.4005401 -7.689141 2.888061 0.8673442
## 5-2 0.2743383 -4.650048 5.198725 0.9999998
## 6-2 -0.9842238 -7.000653 5.032205 0.9996773
## 7-2 -2.8926110 -8.624597 2.839375 0.7900780
## 8-2 1.1927674 -4.823662 7.209196 0.9988599
## 4-3 -2.2512769 -8.496654 3.994100 0.9580793
## 5-3 0.4236015 -5.516518 6.363721 0.9999989
## 6-3 -0.8349605 -7.707577 6.037656 0.9999564
## 7-3 -2.7433478 -9.368384 3.881688 0.9142252
## 8-3 1.3420306 -5.530585 8.214647 0.9989666
## 5-4 2.6748784 -2.065500 7.415257 0.6787825
## 6-4 1.4163164 -4.450456 7.283089 0.9960028
## 7-4 -0.4920709 -6.066770 5.082628 0.9999951
## 8-4 3.5933075 -2.273465 9.460080 0.5792778
## 6-5 -1.2585620 -6.799257 4.282133 0.9972739
## 7-5 -3.1669493 -8.397394 2.063495 0.5939767
## 8-5 0.9184291 -4.622266 6.459124 0.9996478
## 7-6 -1.9083873 -8.177788 4.361013 0.9837695
## 8-6 2.1769912 -4.353487 8.707469 0.9727208
## 8-7 4.0853784 -2.184022 10.354779 0.4970667
## [[1]]
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = weight ~ clusters, data = dat)
##
## $clusters
## diff lwr upr p adj
## 2-1 2.1591282 -5.5729629 9.8912193 0.9901872
## 3-1 3.8687927 -2.8998561 10.6374414 0.6631978
## 4-1 3.6648030 -3.9125116 11.2421176 0.8236326
## 5-1 3.8863861 -3.3016244 11.0743965 0.7241214
## 6-1 6.8263980 -1.2403462 14.8931423 0.1679714
## 7-1 0.6174954 -5.8697878 7.1047785 0.9999917
## 8-1 4.9035967 -1.8856646 11.6928579 0.3560847
## 3-2 1.7096645 -4.7358171 8.1551460 0.9928068
## 4-2 1.5056748 -5.7844091 8.7957586 0.9985036
## 5-2 1.7272579 -5.1572988 8.6118145 0.9948876
## 6-2 4.6672698 -3.1302932 12.4648328 0.6075796
## 7-2 -1.5416329 -7.6909799 4.6077142 0.9949115
## 8-2 2.7444685 -3.7226557 9.2115926 0.9028709
## 4-3 -0.2039897 -6.4629589 6.0549796 1.0000000
## 5-3 0.0175934 -5.7639716 5.7991584 1.0000000
## 6-3 2.9576054 -3.8857390 9.8009497 0.8939945
## 7-3 -3.2512973 -8.1343130 1.6317184 0.4668432
## 8-3 1.0348040 -4.2427966 6.3124046 0.9989307
## 5-4 0.2215831 -6.4886765 6.9318427 1.0000000
## 6-4 3.1615950 -4.4825172 10.8057073 0.9143170
## 7-4 -3.0473076 -9.0008723 2.9062571 0.7769133
## 8-4 1.2387937 -5.0424608 7.5200482 0.9988895
## 6-5 2.9400120 -4.3183797 10.1984037 0.9227518
## 7-5 -3.2688907 -8.7183614 2.1805799 0.6048645
## 8-5 1.0172106 -4.7884725 6.8228937 0.9994879
## 7-6 -6.2089027 -12.7740835 0.3562781 0.0793080
## 8-6 -1.9228014 -8.7865339 4.9409311 0.9899968
## 8-7 4.2861013 -0.6254468 9.1976494 0.1393397
##
##
## [[2]]
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = weight ~ clusters, data = dat)
##
## $clusters
## diff lwr upr p adj
## 2-1 0.7464453 -4.163330 5.656220 0.9998020
## 3-1 0.5971821 -5.330830 6.525194 0.9999879
## 4-1 -1.6540948 -6.379293 3.071103 0.9642441
## 5-1 1.0207836 -3.292890 5.334457 0.9964723
## 6-1 -0.2377785 -5.765492 5.289935 1.0000000
## 7-1 -2.1461657 -7.362856 3.070525 0.9169357
## 8-1 1.9392127 -3.588500 7.466926 0.9638188
## 3-2 -0.1492632 -6.535430 6.236904 1.0000000
## 4-2 -2.4005401 -7.689141 2.888061 0.8673442
## 5-2 0.2743383 -4.650048 5.198725 0.9999998
## 6-2 -0.9842238 -7.000653 5.032205 0.9996773
## 7-2 -2.8926110 -8.624597 2.839375 0.7900780
## 8-2 1.1927674 -4.823662 7.209196 0.9988599
## 4-3 -2.2512769 -8.496654 3.994100 0.9580793
## 5-3 0.4236015 -5.516518 6.363721 0.9999989
## 6-3 -0.8349605 -7.707577 6.037656 0.9999564
## 7-3 -2.7433478 -9.368384 3.881688 0.9142252
## 8-3 1.3420306 -5.530585 8.214647 0.9989666
## 5-4 2.6748784 -2.065500 7.415257 0.6787825
## 6-4 1.4163164 -4.450456 7.283089 0.9960028
## 7-4 -0.4920709 -6.066770 5.082628 0.9999951
## 8-4 3.5933075 -2.273465 9.460080 0.5792778
## 6-5 -1.2585620 -6.799257 4.282133 0.9972739
## 7-5 -3.1669493 -8.397394 2.063495 0.5939767
## 8-5 0.9184291 -4.622266 6.459124 0.9996478
## 7-6 -1.9083873 -8.177788 4.361013 0.9837695
## 8-6 2.1769912 -4.353487 8.707469 0.9727208
## 8-7 4.0853784 -2.184022 10.354779 0.4970667
bbdd_clusters_9 %>% group_by(sex_female, .add=TRUE) %>% group_split() %>% map(function(dat){
boxplot(dat$BMI~dat$clusters,col=viridis(8), main=paste("BMI DISTRIBUTION IN", if_else(dat$sex_female[1]=="Men", "MALE", "FEMALE")),
xlab="CLUSTER", ylab="BMI")
})
## [[1]]
## [[1]]$stats
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## [1,] 21.550 20.830 21.160 20.320 20.55 22.13 19.270 19.02 20.44
## [2,] 25.880 26.495 26.810 26.590 26.42 26.73 26.145 27.22 26.57
## [3,] 27.970 29.210 29.095 29.445 28.75 30.09 28.680 30.08 28.49
## [4,] 30.485 32.535 31.360 32.560 32.82 32.79 31.535 33.27 31.66
## [5,] 34.810 38.400 37.720 40.520 42.07 41.74 38.600 41.80 38.20
##
## [[1]]$n
## [1] 75 63 138 122 71 58 167 118 85
##
## [[1]]$conf
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 27.12985 28.00767 28.48303 28.59101 27.54993 28.83277 28.021 29.20002
## [2,] 28.81015 30.41233 29.70697 30.29899 29.95007 31.34723 29.339 30.95998
## [,9]
## [1,] 27.6177
## [2,] 29.3623
##
## [[1]]$out
## [1] 40.64 37.55 40.04 42.72 38.37 44.69 40.15 39.12 51.53 41.27 40.14 40.16
## [13] 41.87 53.52 16.31 42.61 45.88 49.45 49.96 39.68 42.31 43.25 39.93
##
## [[1]]$group
## [1] 1 1 1 2 3 3 3 3 3 3 3 3 4 4 4 5 6 6 7 7 7 8 9
##
## [[1]]$names
## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9"
##
##
## [[2]]
## [[2]]$stats
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## [1,] 19.720 19.560 19.110 17.97 16.750 19.97 19.740 19.020 18.27
## [2,] 26.725 27.085 27.610 26.96 27.490 27.60 27.005 27.700 26.78
## [3,] 30.600 31.075 31.155 30.44 30.630 32.38 31.430 31.915 31.11
## [4,] 34.120 34.740 33.730 34.81 36.415 36.44 36.360 36.110 34.60
## [5,] 44.600 46.090 42.060 44.59 49.490 45.92 49.100 45.700 45.49
##
## [[2]]$n
## [1] 116 152 86 234 239 105 167 104 114
##
## [[2]]$conf
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 29.51516 30.09397 30.1123 29.62919 29.71785 31.01694 30.28622 30.61202
## [2,] 31.68484 32.05603 32.1977 31.25081 31.54215 33.74306 32.57378 33.21798
## [,9]
## [1,] 29.95279
## [2,] 32.26721
##
## [[2]]$out
## [1] 51.93 49.34 56.14 14.00 46.80 49.95 43.47 44.38 51.47 46.66 49.73 47.87
## [13] 50.94 47.36 53.95 48.91 52.27 50.47 51.02 51.86 52.34 59.14 54.65 51.86
## [25] 48.87 49.05
##
## [[2]]$group
## [1] 1 2 2 2 2 2 3 3 4 4 4 4 4 4 4 4 5 5 5 7 7 7 8 8 8 9
##
## [[2]]$names
## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9"
bbdd_clusters_9 %>% group_by(sex_female, .add=TRUE) %>% group_split() %>% map2(bbdd_covar %>% group_by(sex_female) %>% group_split,
function(dat, weight){
dat <- dat %>% left_join(weight %>% select(weight, rowId), by="rowId")
boxplot(dat$weight~dat$clusters,col=viridis(8), main=paste("WEIGTH DISTRIBUTION IN", if_else(dat$sex_female[1]=="Men", "MALE", "FEMALE")),
xlab="CLUSTER", ylab="BMI")
})
## [[1]]
## [[1]]$stats
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## [1,] 59.10 59.1 52.00 44.4 52.0 61.5 51.20 57.0 55.5
## [2,] 71.25 74.0 73.70 74.0 74.3 73.0 71.70 75.5 71.1
## [3,] 78.50 81.2 82.85 82.7 84.0 84.0 79.00 85.3 79.9
## [4,] 84.95 92.5 90.00 94.0 95.0 95.5 89.25 97.0 88.0
## [5,] 102.00 113.0 114.40 121.0 110.5 116.4 115.20 125.0 110.0
##
## [[1]]$n
## [1] 75 63 138 122 71 58 167 118 85
##
## [[1]]$conf
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 76.00054 77.51737 80.65767 79.83907 80.11851 79.33206 76.85427 82.17281
## [2,] 80.99946 84.88263 85.04233 85.56093 87.88149 88.66794 81.14573 88.42719
## [,9]
## [1,] 77.00376
## [2,] 82.79624
##
## [[1]]$out
## [1] 112.0 108.5 130.0 140.0 156.0 122.1 116.0 123.0 126.3 156.5 125.0 128.0
## [13] 132.0 133.3 138.9 133.0 118.2 153.0 118.0 121.0 119.5
##
## [[1]]$group
## [1] 1 1 2 3 3 3 3 3 4 4 4 5 5 5 6 6 7 7 7 9 9
##
## [[1]]$names
## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9"
##
##
## [[2]]
## [[2]]$stats
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## [1,] 46.00 31.50 43.0 38.3 44.50 48.5 45.00 45.10 35.8
## [2,] 62.25 62.85 66.0 63.0 65.00 66.8 64.65 65.85 62.8
## [3,] 72.75 71.25 74.0 71.0 72.80 75.8 72.80 77.30 71.2
## [4,] 83.50 86.50 81.8 83.0 86.25 84.4 84.00 87.00 84.0
## [5,] 104.40 120.00 105.0 113.0 114.50 107.5 109.50 116.90 114.0
##
## [[2]]$n
## [1] 116 152 86 234 239 105 167 104 114
##
## [[2]]$conf
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 69.63264 68.21914 71.30806 68.93424 70.62821 73.08622 70.43419 74.02319
## [2,] 75.86736 74.28086 76.69194 73.06576 74.97179 78.51378 75.16581 80.57681
## [,9]
## [1,] 68.06281
## [2,] 74.33719
##
## [[2]]$out
## [1] 128.0 126.3 128.0 128.6 41.5 108.0 128.5 115.0 156.0 124.3 115.0 122.0
## [13] 121.5 126.0 119.0 130.6 121.0 112.5 112.2 126.5 123.0 122.0 121.0 134.0
## [25] 161.0 115.0 150.6 122.0
##
## [[2]]$group
## [1] 1 2 2 3 3 3 4 4 4 4 4 5 5 5 5 5 5 6 6 7 7 7 7 7 7 7 8 8
##
## [[2]]$names
## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9"
bbdd_clusters_10 %>% group_by(sex_female, .add=TRUE) %>% group_split() %>% map(function(dat){
boxplot(dat$BMI~dat$clusters,col=viridis(8), main=paste("BMI DISTRIBUTION IN", if_else(dat$sex_female[1]=="Men", "MALE", "FEMALE")),
xlab="CLUSTER", ylab="BMI")
})
## [[1]]
## [[1]]$stats
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,] 21.55 20.83 21.16 20.320 23.130 22.130 19.270 19.02 20.44 20.550
## [2,] 25.93 26.33 26.78 26.895 27.540 27.070 24.895 26.64 26.45 26.680
## [3,] 28.49 28.75 28.63 29.750 29.595 29.425 28.410 29.80 28.67 29.130
## [4,] 30.67 32.40 30.90 32.600 32.990 32.165 32.250 33.28 31.66 32.105
## [5,] 37.55 38.40 37.04 40.520 41.080 38.970 42.310 41.80 38.09 40.150
##
## [[1]]$n
## [1] 73 52 167 131 70 64 103 109 65 63
##
## [[1]]$conf
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 27.61346 27.42002 28.12627 28.96245 28.56579 28.41874 27.26496 28.79513
## [2,] 29.36654 30.07998 29.13373 30.53755 30.62421 30.43126 29.55504 30.80487
## [,9] [,10]
## [1,] 27.64897 28.05009
## [2,] 29.69103 30.20991
##
## [[1]]$out
## [1] 39.16 40.64 40.04 20.06 37.34 38.37 44.69 38.06 38.60 38.65 39.12 37.99
## [13] 42.72 53.52 41.27 16.31 42.61 42.07 45.88 49.45 41.74 49.96 43.25 41.87
## [25] 51.53
##
## [[1]]$group
## [1] 1 1 1 3 3 3 3 3 3 3 3 3 4 4 4 4 5 5 6 6 6 7 8 10 10
##
## [[1]]$names
## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10"
##
##
## [[2]]
## [[2]]$stats
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,] 19.720 19.560 19.110 17.970 16.75 19.970 19.74 18.270 19.010 20.450
## [2,] 26.400 27.390 27.610 26.725 27.39 26.855 27.74 27.475 27.225 28.355
## [3,] 30.825 31.185 30.875 30.320 30.70 30.300 31.78 32.150 30.950 32.275
## [4,] 34.610 35.190 33.730 35.580 35.88 33.090 36.44 36.270 35.175 36.035
## [5,] 44.600 46.800 42.060 46.660 47.25 42.230 49.10 48.870 45.490 46.730
##
## [[2]]$n
## [1] 126 146 82 155 229 147 157 100 75 100
##
## [[2]]$conf
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 29.66938 30.16506 29.80717 29.19622 29.81356 29.48748 30.68295 30.76039
## [2,] 31.98062 32.20494 31.94283 31.44378 31.58644 31.11252 32.87705 33.53961
## [,9] [,10]
## [1,] 29.49958 31.06156
## [2,] 32.40042 33.48844
##
## [[2]]$out
## [1] 14.00 51.93 51.02 49.34 56.14 49.95 43.47 44.38 51.47 50.94 53.95 48.91
## [13] 52.27 49.49 50.47 49.73 45.92 42.64 47.36 43.42 45.06 43.86 43.86 51.86
## [25] 52.34 59.14 54.65 51.86 49.05 47.87
##
## [[2]]$group
## [1] 1 1 1 2 2 2 3 3 4 4 4 4 5 5 5 5 6 6 6 6 6 6 6 7 7
## [26] 7 8 8 9 10
##
## [[2]]$names
## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10"
bbdd_clusters_10 %>% group_by(sex_female, .add=TRUE) %>% group_split() %>% map2(bbdd_covar %>% group_by(sex_female) %>% group_split,
function(dat, weight){
dat <- dat %>% left_join(weight %>% select(weight, rowId), by="rowId")
boxplot(dat$weight~dat$clusters,col=viridis(8), main=paste("WEIGTH DISTRIBUTION IN", if_else(dat$sex_female[1]=="Men", "MALE", "FEMALE")),
xlab="CLUSTER", ylab="BMI")
})
## [[1]]
## [[1]]$stats
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,] 59.1 52.0 52.0 44.40 60.0 61.50 51.20 57.0 56.5 52.00
## [2,] 71.7 72.8 72.9 74.00 76.5 74.75 70.00 75.0 70.3 74.00
## [3,] 79.0 80.6 81.4 82.30 84.7 84.00 78.50 84.3 82.0 82.70
## [4,] 87.0 91.6 89.5 94.45 96.0 95.95 90.25 94.5 89.5 90.25
## [5,] 103.0 108.3 113.5 125.00 123.0 116.40 118.00 121.0 110.0 112.70
##
## [[1]]$n
## [1] 73 52 167 131 70 64 103 109 65 63
##
## [[1]]$conf
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 76.17065 76.4808 79.37042 79.47697 81.0175 79.813 75.34744 81.34894
## [2,] 81.82935 84.7192 83.42958 85.12303 88.3825 88.187 81.65256 87.25106
## [,9] [,10]
## [1,] 78.23728 79.46525
## [2,] 85.76272 85.93475
##
## [[1]]$out
## [1] 121.3 112.0 113.0 130.0 115.0 140.0 118.2 114.4 126.3 156.5 128.0 132.0
## [13] 133.3 138.9 133.0 153.0 125.0 121.0 121.0 156.0
##
## [[1]]$group
## [1] 1 1 1 2 3 3 3 3 4 4 5 5 5 6 6 7 8 9 10 10
##
## [[1]]$names
## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10"
##
##
## [[2]]
## [[2]]$stats
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,] 31.5 44.6 41.5 38.30 44.4 47.50 45.3 45.10 44.50 48.50
## [2,] 61.0 63.5 65.0 63.00 64.8 62.00 65.0 67.25 63.60 66.45
## [3,] 72.7 71.5 74.0 71.90 73.0 70.00 75.0 77.95 72.00 74.25
## [4,] 85.0 86.5 81.6 84.75 86.0 80.65 86.5 87.00 84.55 84.00
## [5,] 105.0 120.0 105.0 115.00 114.5 107.50 115.0 109.80 106.50 102.50
##
## [[2]]$n
## [1] 126 146 82 155 229 147 157 100 75 100
##
## [[2]]$conf
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 69.32182 68.49248 71.1036 69.13974 70.78652 67.5696 72.2889 74.8295
## [2,] 76.07818 74.50752 76.8964 74.66026 75.21348 72.4304 77.7111 81.0705
## [,9] [,10]
## [1,] 68.17783 71.4771
## [2,] 75.82217 77.0229
##
## [[2]]$out
## [1] 128.0 130.6 126.3 128.0 128.6 108.0 128.5 156.0 122.0 121.5 126.0 119.0
## [13] 121.0 124.3 112.5 112.2 126.5 123.0 122.0 121.0 134.0 161.0 150.6 116.9
## [25] 35.8 122.0 38.5 115.0 111.0
##
## [[2]]$group
## [1] 1 1 2 2 3 3 4 4 5 5 5 5 5 6 6 6 7 7 7 7 7 7 8 8 8
## [26] 8 10 10 10
##
## [[2]]$names
## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10"
bmi_arch <- arch_maxprob %>% map(function(arch){
arch <- arch %>% select(rowId, name) %>% left_join(bbdd_covar %>% select(rowId, BMI, weight, sex_female), by = "rowId")
})
bmi_arch %>% map(function(arch){
boxplot(arch$BMI~arch$name,col=viridis(8), main=paste("BMI DISTRIBUTION IN", if_else(arch$sex_female[1]=="Men", "MALE", "FEMALE")),
xlab="CLUSTER", ylab="BMI")
boxplot(arch$weight~arch$name,col=viridis(8), main=paste("WEIGTH DISTRIBUTION IN", if_else(arch$sex_female[1]=="Men", "MALE", "FEMALE")),
xlab="CLUSTER", ylab="BMI")
})
## $Men
## $Men$stats
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 57.00 55.5 51.2 52.0 59.20 52.0 59.0
## [2,] 75.10 73.2 71.0 72.6 73.00 73.0 74.0
## [3,] 83.30 82.0 78.5 80.6 84.10 81.0 82.5
## [4,] 92.75 91.8 88.0 91.2 95.25 91.2 93.0
## [5,] 113.00 115.3 113.0 118.2 116.40 118.2 121.0
##
## $Men$n
## [1] 76 177 178 170 87 343 105
##
## $Men$conf
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 80.10114 79.79106 76.48676 78.34604 80.33099 79.44732 79.57035
## [2,] 86.49886 84.20894 80.51324 82.85396 87.86901 82.55268 85.42965
##
## $Men$out
## [1] 121.0 126.3 119.5 130.0 153.0 120.5 122.1 135.0 156.5 116.0 125.0 44.4
## [13] 140.0 138.9 133.0 128.0 132.0 121.3 133.3 120.5 121.0 119.4 156.0 125.0
## [25] 123.0 136.0 125.0
##
## $Men$group
## [1] 1 1 1 2 2 2 2 3 3 3 3 3 4 5 5 6 6 6 6 6 6 6 6 6 6 7 7
##
## $Men$names
## [1] "Cluster 1" "Cluster 2" "Cluster 3" "Cluster 4" "Cluster 5" "Cluster 6"
## [7] "Cluster 7"
##
##
## $Women
## $Women$stats
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 44.60 41.50 35.80 31.5 40.0 38.5 38.3 45.6
## [2,] 63.75 67.00 63.45 61.0 64.7 63.5 63.5 63.8
## [3,] 71.50 74.75 73.50 70.7 72.2 71.0 72.0 74.0
## [4,] 83.85 85.05 85.75 84.0 83.8 83.6 84.1 86.4
## [5,] 106.00 109.80 116.90 113.3 111.0 112.6 113.0 114.0
##
## $Women$n
## [1] 99 216 92 174 197 306 229 187
##
## $Women$conf
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 68.3082 72.80953 69.8266 67.94507 70.04991 69.18452 69.84917 71.38877
## [2,] 74.6918 76.69047 77.1734 73.45493 74.35009 72.81548 74.15083 76.61123
##
## $Women$out
## [1] 128.0 114.0 115.0 126.3 128.6 130.6 150.6 122.0 123.0 120.0 128.0 134.0
## [13] 122.0 115.0 119.0 128.5 114.5 126.0 124.3 115.0 156.0 121.0 126.5 121.5
## [25] 122.0 121.0 161.0
##
## $Women$group
## [1] 1 1 1 2 2 2 3 3 4 4 4 4 5 5 5 6 6 6 6 6 7 7 8 8 8 8 8
##
## $Women$names
## [1] "Cluster 1" "Cluster 2" "Cluster 3" "Cluster 4" "Cluster 5" "Cluster 6"
## [7] "Cluster 7" "Cluster 8"
bmi_arch %>% map(function(dat){
res.aov <- aov(BMI ~ name, data = dat)
summary(res.aov)
TukeyHSD(res.aov)
})
## $Men
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = BMI ~ name, data = dat)
##
## $name
## diff lwr upr p adj
## Cluster 2-Cluster 1 -1.05158935 -2.88640648 0.7832278 0.6210301
## Cluster 3-Cluster 1 -1.61268185 -3.44595008 0.2205864 0.1273831
## Cluster 4-Cluster 1 -1.14969969 -2.99582956 0.6964302 0.5214971
## Cluster 5-Cluster 1 0.07785541 -2.02279282 2.1785036 0.9999999
## Cluster 6-Cluster 1 -0.59328295 -2.28949096 1.1029251 0.9463420
## Cluster 7-Cluster 1 -0.47138596 -2.48633374 1.5435618 0.9931067
## Cluster 3-Cluster 2 -0.56109249 -1.98127283 0.8590878 0.9064018
## Cluster 4-Cluster 2 -0.09811034 -1.53485501 1.3386343 0.9999944
## Cluster 5-Cluster 2 1.12944477 -0.62234368 2.8812332 0.4777769
## Cluster 6-Cluster 2 0.45830640 -0.77990253 1.6965153 0.9301807
## Cluster 7-Cluster 2 0.58020339 -1.06784221 2.2282490 0.9446695
## Cluster 4-Cluster 3 0.46298215 -0.97178397 1.8977483 0.9635897
## Cluster 5-Cluster 3 1.69053726 -0.05962882 3.4407033 0.0662542
## Cluster 6-Cluster 3 1.01939889 -0.21651369 2.2553115 0.1844643
## Cluster 7-Cluster 3 1.14129588 -0.50502513 2.7876169 0.3851597
## Cluster 5-Cluster 4 1.22755510 -0.53607875 2.9911890 0.3800488
## Cluster 6-Cluster 4 0.55641674 -0.69849481 1.8113283 0.8475883
## Cluster 7-Cluster 4 0.67831373 -0.98231746 2.3389449 0.8917930
## Cluster 6-Cluster 5 -0.67113837 -2.27716778 0.9348910 0.8807302
## Cluster 7-Cluster 5 -0.54924138 -2.48888656 1.3904038 0.9811122
## Cluster 7-Cluster 6 0.12189699 -1.37029001 1.6140840 0.9999837
##
##
## $Women
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = BMI ~ name, data = dat)
##
## $name
## diff lwr upr p adj
## Cluster 2-Cluster 1 0.701506734 -1.566955 2.9699686 0.9822405
## Cluster 3-Cluster 1 0.021341678 -2.685269 2.7279527 1.0000000
## Cluster 4-Cluster 1 -0.223183560 -2.576118 2.1297506 0.9999920
## Cluster 5-Cluster 1 0.159603651 -2.142982 2.4621894 0.9999991
## Cluster 6-Cluster 1 0.005825906 -2.155249 2.1669011 1.0000000
## Cluster 7-Cluster 1 0.097775131 -2.150359 2.3459095 1.0000000
## Cluster 8-Cluster 1 0.520487225 -1.802599 2.8435732 0.9975057
## Cluster 3-Cluster 2 -0.680165056 -3.007052 1.6467217 0.9872237
## Cluster 4-Cluster 2 -0.924690294 -2.828622 0.9792419 0.8213092
## Cluster 5-Cluster 2 -0.541903083 -2.383250 1.2994442 0.9867018
## Cluster 6-Cluster 2 -0.695680828 -2.356676 0.9653142 0.9094052
## Cluster 7-Cluster 2 -0.603731603 -2.376517 1.1690534 0.9693187
## Cluster 8-Cluster 2 -0.181019509 -2.047939 1.6858997 0.9999907
## Cluster 4-Cluster 3 -0.244525237 -2.653837 2.1647863 0.9999873
## Cluster 5-Cluster 3 0.138261973 -2.221904 2.4984281 0.9999997
## Cluster 6-Cluster 3 -0.015515772 -2.237841 2.2068093 1.0000000
## Cluster 7-Cluster 3 0.076433454 -2.230641 2.3835075 1.0000000
## Cluster 8-Cluster 3 0.499145548 -1.881025 2.8793160 0.9983648
## Cluster 5-Cluster 4 0.382787210 -1.561677 2.3272511 0.9989129
## Cluster 6-Cluster 4 0.229009466 -1.545613 2.0036318 0.9999345
## Cluster 7-Cluster 4 0.320958691 -1.558708 2.2006253 0.9995722
## Cluster 8-Cluster 4 0.743670785 -1.225026 2.7123676 0.9463144
## Cluster 6-Cluster 5 -0.153777745 -1.861082 1.5535262 0.9999944
## Cluster 7-Cluster 5 -0.061828519 -1.878074 1.7544173 1.0000000
## Cluster 8-Cluster 5 0.360883574 -1.547354 2.2691208 0.9991628
## Cluster 7-Cluster 6 0.091949225 -1.541175 1.7250730 0.9999998
## Cluster 8-Cluster 6 0.514661319 -1.220191 2.2495141 0.9860611
## Cluster 8-Cluster 7 0.422712094 -1.419454 2.2648782 0.9970989
bmi_arch %>% map(function(dat){
res.aov <- aov(weight ~ name, data = dat)
print(summary(res.aov))
print(TukeyHSD(res.aov))
})
## Df Sum Sq Mean Sq F value Pr(>F)
## name 6 2569 428.2 1.895 0.0786 .
## Residuals 1129 255159 226.0
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = weight ~ name, data = dat)
##
## $name
## diff lwr upr p adj
## Cluster 2-Cluster 1 -1.5070473 -7.5966096 4.582515 0.9906763
## Cluster 3-Cluster 1 -4.3494678 -10.4338895 1.734954 0.3464782
## Cluster 4-Cluster 1 -2.3205418 -8.4476499 3.806566 0.9224906
## Cluster 5-Cluster 1 1.1329099 -5.8389175 8.104737 0.9990953
## Cluster 6-Cluster 1 -0.9608102 -6.5903436 4.668723 0.9988037
## Cluster 7-Cluster 1 -0.3626566 -7.0500534 6.324740 0.9999986
## Cluster 3-Cluster 2 -2.8424205 -7.5558475 1.871007 0.5611042
## Cluster 4-Cluster 2 -0.8134945 -5.5818968 3.954908 0.9988065
## Cluster 5-Cluster 2 2.6399571 -3.1740418 8.453956 0.8323696
## Cluster 6-Cluster 2 0.5462371 -3.5632463 4.655720 0.9997155
## Cluster 7-Cluster 2 1.1443906 -4.3252969 6.614078 0.9962550
## Cluster 4-Cluster 3 2.0289260 -2.7329097 6.790762 0.8705860
## Cluster 5-Cluster 3 5.4823776 -0.3262369 11.290992 0.0789710
## Cluster 6-Cluster 3 3.3886576 -0.7132044 7.490520 0.1829158
## Cluster 7-Cluster 3 3.9868111 -1.4771527 9.450775 0.3211366
## Cluster 5-Cluster 4 3.4534517 -2.3998610 9.306764 0.5873279
## Cluster 6-Cluster 4 1.3597316 -2.8051859 5.524649 0.9614550
## Cluster 7-Cluster 4 1.9578852 -3.5535726 7.469343 0.9422810
## Cluster 6-Cluster 5 -2.0937200 -7.4239603 3.236520 0.9087663
## Cluster 7-Cluster 5 -1.4955665 -7.9330419 4.941909 0.9933613
## Cluster 7-Cluster 6 0.5981535 -4.3542559 5.550563 0.9998371
##
## Df Sum Sq Mean Sq F value Pr(>F)
## name 7 1258 179.7 0.7 0.672
## Residuals 1492 382891 256.6
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = weight ~ name, data = dat)
##
## $name
## diff lwr upr p adj
## Cluster 2-Cluster 1 2.36296296 -3.538399 8.264325 0.9275456
## Cluster 3-Cluster 1 0.84637681 -6.194822 7.887575 0.9999596
## Cluster 4-Cluster 1 -0.20977011 -6.330885 5.911345 1.0000000
## Cluster 5-Cluster 1 0.02445008 -5.965684 6.014584 1.0000000
## Cluster 6-Cluster 1 -0.02418301 -5.646180 5.597814 1.0000000
## Cluster 7-Cluster 1 0.64425036 -5.204230 6.492730 0.9999777
## Cluster 8-Cluster 1 1.77237077 -4.271095 7.815836 0.9869730
## Cluster 3-Cluster 2 -1.51658615 -7.569939 4.536767 0.9949758
## Cluster 4-Cluster 2 -2.57273308 -7.525778 2.380311 0.7644598
## Cluster 5-Cluster 2 -2.33851288 -7.128744 2.451718 0.8173443
## Cluster 6-Cluster 2 -2.38714597 -6.708194 1.933902 0.7024321
## Cluster 7-Cluster 2 -1.71871260 -6.330580 2.893155 0.9500040
## Cluster 8-Cluster 2 -0.59059220 -5.447348 4.266164 0.9999563
## Cluster 4-Cluster 3 -1.05614693 -7.323926 5.211632 0.9996081
## Cluster 5-Cluster 3 -0.82192673 -6.961855 5.318002 0.9999161
## Cluster 6-Cluster 3 -0.87055982 -6.651897 4.910778 0.9998147
## Cluster 7-Cluster 3 -0.20212645 -6.203937 5.799684 1.0000000
## Cluster 8-Cluster 3 0.92599395 -5.265976 7.117963 0.9998231
## Cluster 5-Cluster 4 0.23422020 -4.824267 5.292707 0.9999999
## Cluster 6-Cluster 4 0.18558711 -4.431060 4.802234 1.0000000
## Cluster 7-Cluster 4 0.85402048 -4.035898 5.743939 0.9995034
## Cluster 8-Cluster 4 1.98214088 -3.139388 7.103669 0.9390945
## Cluster 6-Cluster 5 -0.04863309 -4.490153 4.392887 1.0000000
## Cluster 7-Cluster 5 0.61980028 -4.105130 5.344730 0.9999269
## Cluster 8-Cluster 5 1.74792068 -3.216323 6.712165 0.9631432
## Cluster 7-Cluster 6 0.66843337 -3.580108 4.916975 0.9997516
## Cluster 8-Cluster 6 1.79655377 -2.716634 6.309741 0.9296638
## Cluster 8-Cluster 7 1.12812040 -3.664241 5.920482 0.9965929
## $Men
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = weight ~ name, data = dat)
##
## $name
## diff lwr upr p adj
## Cluster 2-Cluster 1 -1.5070473 -7.5966096 4.582515 0.9906763
## Cluster 3-Cluster 1 -4.3494678 -10.4338895 1.734954 0.3464782
## Cluster 4-Cluster 1 -2.3205418 -8.4476499 3.806566 0.9224906
## Cluster 5-Cluster 1 1.1329099 -5.8389175 8.104737 0.9990953
## Cluster 6-Cluster 1 -0.9608102 -6.5903436 4.668723 0.9988037
## Cluster 7-Cluster 1 -0.3626566 -7.0500534 6.324740 0.9999986
## Cluster 3-Cluster 2 -2.8424205 -7.5558475 1.871007 0.5611042
## Cluster 4-Cluster 2 -0.8134945 -5.5818968 3.954908 0.9988065
## Cluster 5-Cluster 2 2.6399571 -3.1740418 8.453956 0.8323696
## Cluster 6-Cluster 2 0.5462371 -3.5632463 4.655720 0.9997155
## Cluster 7-Cluster 2 1.1443906 -4.3252969 6.614078 0.9962550
## Cluster 4-Cluster 3 2.0289260 -2.7329097 6.790762 0.8705860
## Cluster 5-Cluster 3 5.4823776 -0.3262369 11.290992 0.0789710
## Cluster 6-Cluster 3 3.3886576 -0.7132044 7.490520 0.1829158
## Cluster 7-Cluster 3 3.9868111 -1.4771527 9.450775 0.3211366
## Cluster 5-Cluster 4 3.4534517 -2.3998610 9.306764 0.5873279
## Cluster 6-Cluster 4 1.3597316 -2.8051859 5.524649 0.9614550
## Cluster 7-Cluster 4 1.9578852 -3.5535726 7.469343 0.9422810
## Cluster 6-Cluster 5 -2.0937200 -7.4239603 3.236520 0.9087663
## Cluster 7-Cluster 5 -1.4955665 -7.9330419 4.941909 0.9933613
## Cluster 7-Cluster 6 0.5981535 -4.3542559 5.550563 0.9998371
##
##
## $Women
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = weight ~ name, data = dat)
##
## $name
## diff lwr upr p adj
## Cluster 2-Cluster 1 2.36296296 -3.538399 8.264325 0.9275456
## Cluster 3-Cluster 1 0.84637681 -6.194822 7.887575 0.9999596
## Cluster 4-Cluster 1 -0.20977011 -6.330885 5.911345 1.0000000
## Cluster 5-Cluster 1 0.02445008 -5.965684 6.014584 1.0000000
## Cluster 6-Cluster 1 -0.02418301 -5.646180 5.597814 1.0000000
## Cluster 7-Cluster 1 0.64425036 -5.204230 6.492730 0.9999777
## Cluster 8-Cluster 1 1.77237077 -4.271095 7.815836 0.9869730
## Cluster 3-Cluster 2 -1.51658615 -7.569939 4.536767 0.9949758
## Cluster 4-Cluster 2 -2.57273308 -7.525778 2.380311 0.7644598
## Cluster 5-Cluster 2 -2.33851288 -7.128744 2.451718 0.8173443
## Cluster 6-Cluster 2 -2.38714597 -6.708194 1.933902 0.7024321
## Cluster 7-Cluster 2 -1.71871260 -6.330580 2.893155 0.9500040
## Cluster 8-Cluster 2 -0.59059220 -5.447348 4.266164 0.9999563
## Cluster 4-Cluster 3 -1.05614693 -7.323926 5.211632 0.9996081
## Cluster 5-Cluster 3 -0.82192673 -6.961855 5.318002 0.9999161
## Cluster 6-Cluster 3 -0.87055982 -6.651897 4.910778 0.9998147
## Cluster 7-Cluster 3 -0.20212645 -6.203937 5.799684 1.0000000
## Cluster 8-Cluster 3 0.92599395 -5.265976 7.117963 0.9998231
## Cluster 5-Cluster 4 0.23422020 -4.824267 5.292707 0.9999999
## Cluster 6-Cluster 4 0.18558711 -4.431060 4.802234 1.0000000
## Cluster 7-Cluster 4 0.85402048 -4.035898 5.743939 0.9995034
## Cluster 8-Cluster 4 1.98214088 -3.139388 7.103669 0.9390945
## Cluster 6-Cluster 5 -0.04863309 -4.490153 4.392887 1.0000000
## Cluster 7-Cluster 5 0.61980028 -4.105130 5.344730 0.9999269
## Cluster 8-Cluster 5 1.74792068 -3.216323 6.712165 0.9631432
## Cluster 7-Cluster 6 0.66843337 -3.580108 4.916975 0.9997516
## Cluster 8-Cluster 6 1.79655377 -2.716634 6.309741 0.9296638
## Cluster 8-Cluster 7 1.12812040 -3.664241 5.920482 0.9965929